xpu
|
xpu
default namespace.
More...
Classes | |
struct | dim |
struct | grid |
3d execution grid describing the number of blocks and threads of a kernel Use 'n_blocks' or 'n_threads' to construct a grid. More... | |
class | buffer |
struct | device_image |
struct | block_size |
struct | no_smem |
struct | openmp_settings |
OpenMP settings for kernels. More... | |
class | tpos |
class | cmem |
struct | kernel |
struct | function |
struct | constant |
class | kernel_context |
class | view |
class | block_scan |
Parallel scan inside a block. More... | |
class | block_reduce |
class | block_sort |
class | block_merge |
class | exception |
struct | settings |
Settings used to initialize xpu. More... | |
class | device |
class | device_prop |
class | queue |
command queue for a device. More... | |
class | h_view |
Create a view from a buffer. Create a view from a buffer to access the underlying data on the host. The view is a lightweight wrapper around the buffer and does not own the data. If the underlying buffer can't be accessed on the host, an runtime_error is thrown. Note that no synchronization with the device is performed, so the data may be out of date. More... | |
class | ptr_prop |
Properties of a pointer. Properties of a pointer allocated with malloc_device, malloc_host or malloc_shared. More... | |
class | buffer_prop |
class | kernel_timings |
Execution times collected for a kernel. More... | |
class | timings |
Timing information collected via xpu::push_timer and xpu::pop_timer. More... | |
class | scoped_timer |
RAII wrapper for timing functions. More... | |
Enumerations | |
enum | driver_t { cpu = detail::cpu , cuda = detail::cuda , hip = detail::hip , sycl = detail::sycl } |
enum | buffer_type { buf_pinned = detail::buf_pinned , buf_device = detail::buf_device , buf_managed = detail::buf_managed , buf_io = detail::buf_io , buf_stack = detail::buf_stack } |
enum | schedule_t { schedule_static , schedule_dynamic } |
OpenMP schedule types. Used for specifying the schedule type for kernels. More... | |
enum | direction { h2d = detail::dir_h2d , d2h = detail::dir_d2h } |
enum class | mem_type { pinned = detail::mem_pinned , device = detail::mem_device , managed = detail::mem_managed , host = detail::mem_host } |
Functions | |
grid | n_blocks (dim nblocks) |
Construct a grid with the given number of blocks in each dimension. More... | |
grid | n_threads (dim nthreads) |
Construct a grid with the given number of threads in each dimension If the number of threads is not a multiple of the block size, the grid size will be rounded up to the next multiple of the block size. More... | |
constexpr XPU_D float | pi () |
constexpr XPU_D float | pi_2 () |
constexpr XPU_D float | pi_4 () |
constexpr XPU_D float | deg_to_rad () |
constexpr XPU_D float | sqrt2 () |
XPU_D int | abs (int x) |
XPU_D float | abs (float x) |
XPU_D float | acos (float x) |
XPU_D float | acosh (float x) |
XPU_D float | acospi (float x) |
XPU_D float | asin (float x) |
XPU_D float | asinh (float x) |
XPU_D float | asinpi (float x) |
XPU_D float | atan (float x) |
XPU_D float | atan2 (float y, float x) |
XPU_D float | atanh (float x) |
XPU_D float | atanpi (float x) |
XPU_D float | atan2pi (float y, float x) |
XPU_D float | cbrt (float x) |
XPU_D float | ceil (float x) |
XPU_D float | copysign (float x, float y) |
XPU_D float | cos (float x) |
XPU_D float | cosh (float x) |
XPU_D float | cospi (float x) |
XPU_D float | erf (float x) |
XPU_D float | erfc (float x) |
XPU_D float | exp (float x) |
XPU_D float | exp2 (float x) |
XPU_D float | exp10 (float x) |
XPU_D float | expm1 (float x) |
XPU_D float | fdim (float x, float y) |
XPU_D float | floor (float x) |
XPU_D float | fma (float x, float y, float z) |
XPU_D float | fmod (float x, float y) |
XPU_D float | hypot (float x, float y) |
XPU_D int | ilogb (float x) |
XPU_D bool | isfinite (float a) |
XPU_D bool | isinf (float a) |
XPU_D bool | isnan (float a) |
XPU_D float | ldexp (float x, int exp) |
XPU_D long long int | llrint (float x) |
XPU_D long long int | llround (float x) |
XPU_D float | log (float x) |
XPU_D float | log10 (float x) |
XPU_D float | log1p (float x) |
XPU_D float | log2 (float x) |
XPU_D float | logb (float x) |
XPU_D long int | lrint (float x) |
XPU_D long int | lround (float x) |
XPU_D int | max (int a, int b) |
XPU_D unsigned int | max (unsigned int a, unsigned int b) |
XPU_D long long int | max (long long int a, long long int b) |
XPU_D unsigned long long int | max (unsigned long long int a, unsigned long long int b) |
XPU_D float | max (float a, float b) |
XPU_D int | min (int a, int b) |
XPU_D unsigned int | min (unsigned int a, unsigned int b) |
XPU_D long long int | min (long long int a, long long int b) |
XPU_D unsigned long long int | min (unsigned long long int a, unsigned long long int b) |
XPU_D float | min (float a, float b) |
XPU_D float | nan (const char *tagp) |
XPU_D float | norm3d (float a, float b, float c) |
XPU_D float | norm4d (float a, float b, float c, float d) |
XPU_D float | pow (float x, float y) |
XPU_D float | rcbrt (float x) |
XPU_D float | remainder (float x, float y) |
XPU_D float | remquo (float x, float y, int *quo) |
XPU_D float | rint (float x) |
XPU_D float | rhypot (float x, float y) |
XPU_D float | rnorm3d (float a, float b, float c) |
XPU_D float | rnorm4d (float a, float b, float c, float d) |
XPU_D float | round (float x) |
XPU_D float | rsqrt (float x) |
XPU_D bool | signbit (float a) |
XPU_D void | sincos (float x, float *sptr, float *cptr) |
XPU_D void | sincospi (float x, float *sptr, float *cptr) |
XPU_D float | sin (float x) |
XPU_D float | sinh (float x) |
XPU_D float | sinpi (float x) |
XPU_D float | sqrt (float x) |
XPU_D float | tan (float x) |
XPU_D float | tanh (float x) |
XPU_D float | tanpi (float x) |
XPU_D float | tgamma (float x) |
XPU_D float | trunc (float x) |
XPU_D int | atomic_cas (int *addr, int compare, int val) |
XPU_D unsigned int | atomic_cas (unsigned int *addr, unsigned int compare, unsigned int val) |
XPU_D float | atomic_cas (float *addr, float compare, float val) |
XPU_D int | atomic_cas_block (int *addr, int compare, int val) |
XPU_D unsigned int | atomic_cas_block (unsigned int *addr, unsigned int compare, unsigned int val) |
XPU_D float | atomic_cas_block (float *addr, float compare, float val) |
XPU_D int | atomic_add (int *addr, int val) |
XPU_D unsigned int | atomic_add (unsigned int *addr, unsigned int val) |
XPU_D float | atomic_add (float *addr, float val) |
XPU_D int | atomic_add_block (int *addr, int val) |
XPU_D unsigned int | atomic_add_block (unsigned int *addr, unsigned int val) |
XPU_D float | atomic_add_block (float *addr, float val) |
XPU_D int | atomic_sub (int *addr, int val) |
XPU_D unsigned int | atomic_sub (unsigned int *addr, unsigned int val) |
XPU_D int | atomic_sub_block (int *addr, int val) |
XPU_D unsigned int | atomic_sub_block (unsigned int *addr, unsigned int val) |
XPU_D int | atomic_and (int *addr, int val) |
XPU_D unsigned int | atomic_and (unsigned int *addr, unsigned int val) |
XPU_D int | atomic_and_block (int *addr, int val) |
XPU_D unsigned int | atomic_and_block (unsigned int *addr, unsigned int val) |
XPU_D int | atomic_or (int *addr, int val) |
XPU_D unsigned int | atomic_or (unsigned int *addr, unsigned int val) |
XPU_D int | atomic_or_block (int *addr, int val) |
XPU_D unsigned int | atomic_or_block (unsigned int *addr, unsigned int val) |
XPU_D int | atomic_xor (int *addr, int val) |
XPU_D unsigned int | atomic_xor (unsigned int *addr, unsigned int val) |
XPU_D int | atomic_xor_block (int *addr, int val) |
XPU_D unsigned int | atomic_xor_block (unsigned int *addr, unsigned int val) |
XPU_D int | float_as_int (float val) |
XPU_D float | int_as_float (int val) |
XPU_D void | barrier (tpos &) |
Sync all threads in a block. More... | |
template<typename ContextT > | |
XPU_D void | barrier (ContextT &ctx) |
Sync all threads in a block. More... | |
void | initialize (settings={}) |
Initialize xpu. More... | |
template<typename I > | |
void | preload () |
Preload the given device image. More... | |
void * | malloc_device (size_t size_bytes) |
Allocate memory on the device. More... | |
void * | malloc_pinned (size_t size_bytes) |
Allocate pinned memory on the host that can be accessed by the device. More... | |
template<typename T > | |
T * | malloc_host (size_t elems) |
Allocate pinned memory on the host that can be accessed by the device. More... | |
void * | malloc_managed (size_t size_bytes) |
Allocate memory that can be accessed by the device and the host. More... | |
void | free (void *) |
Free memory allocated with malloc_device, malloc_pinned or malloc_managed. More... | |
void | stack_alloc (size_t size) |
Allocate the stack memory on the device. More... | |
void | stack_pop (void *head=nullptr) |
Pop entries from the stack. More... | |
template<typename Kernel > | |
const char * | get_name () |
template<typename Func , typename... Args> | |
void | call (Args &&... args) |
template<typename C > | |
void | set (const typename C::data_t &symbol) |
void | push_timer (std::string_view name) |
timings | pop_timer () |
void | t_add_bytes (size_t bytes) |
template<typename Kernel > | |
void | k_add_bytes (size_t bytes) |
Variables | |
constexpr driver_t | compilation_target = XPU_DETAIL_COMPILATION_TARGET |
xpu
default namespace.
enum xpu::buffer_type |
enum xpu::direction |
enum xpu::driver_t |
|
strong |
Different types of allocated memory.
enum xpu::schedule_t |
XPU_D float xpu::abs | ( | float | x | ) |
XPU_D int xpu::abs | ( | int | x | ) |
XPU_D float xpu::acos | ( | float | x | ) |
XPU_D float xpu::acosh | ( | float | x | ) |
XPU_D float xpu::acospi | ( | float | x | ) |
XPU_D float xpu::asin | ( | float | x | ) |
XPU_D float xpu::asinh | ( | float | x | ) |
XPU_D float xpu::asinpi | ( | float | x | ) |
XPU_D float xpu::atan | ( | float | x | ) |
XPU_D float xpu::atan2 | ( | float | y, |
float | x | ||
) |
XPU_D float xpu::atan2pi | ( | float | y, |
float | x | ||
) |
XPU_D float xpu::atanh | ( | float | x | ) |
XPU_D float xpu::atanpi | ( | float | x | ) |
XPU_D float xpu::atomic_add | ( | float * | addr, |
float | val | ||
) |
XPU_D int xpu::atomic_add | ( | int * | addr, |
int | val | ||
) |
XPU_D unsigned int xpu::atomic_add | ( | unsigned int * | addr, |
unsigned int | val | ||
) |
XPU_D float xpu::atomic_add_block | ( | float * | addr, |
float | val | ||
) |
XPU_D int xpu::atomic_add_block | ( | int * | addr, |
int | val | ||
) |
XPU_D unsigned int xpu::atomic_add_block | ( | unsigned int * | addr, |
unsigned int | val | ||
) |
XPU_D int xpu::atomic_and | ( | int * | addr, |
int | val | ||
) |
XPU_D unsigned int xpu::atomic_and | ( | unsigned int * | addr, |
unsigned int | val | ||
) |
XPU_D int xpu::atomic_and_block | ( | int * | addr, |
int | val | ||
) |
XPU_D unsigned int xpu::atomic_and_block | ( | unsigned int * | addr, |
unsigned int | val | ||
) |
XPU_D float xpu::atomic_cas | ( | float * | addr, |
float | compare, | ||
float | val | ||
) |
XPU_D int xpu::atomic_cas | ( | int * | addr, |
int | compare, | ||
int | val | ||
) |
XPU_D unsigned int xpu::atomic_cas | ( | unsigned int * | addr, |
unsigned int | compare, | ||
unsigned int | val | ||
) |
XPU_D float xpu::atomic_cas_block | ( | float * | addr, |
float | compare, | ||
float | val | ||
) |
XPU_D int xpu::atomic_cas_block | ( | int * | addr, |
int | compare, | ||
int | val | ||
) |
XPU_D unsigned int xpu::atomic_cas_block | ( | unsigned int * | addr, |
unsigned int | compare, | ||
unsigned int | val | ||
) |
XPU_D int xpu::atomic_or | ( | int * | addr, |
int | val | ||
) |
XPU_D unsigned int xpu::atomic_or | ( | unsigned int * | addr, |
unsigned int | val | ||
) |
XPU_D int xpu::atomic_or_block | ( | int * | addr, |
int | val | ||
) |
XPU_D unsigned int xpu::atomic_or_block | ( | unsigned int * | addr, |
unsigned int | val | ||
) |
XPU_D int xpu::atomic_sub | ( | int * | addr, |
int | val | ||
) |
XPU_D unsigned int xpu::atomic_sub | ( | unsigned int * | addr, |
unsigned int | val | ||
) |
XPU_D int xpu::atomic_sub_block | ( | int * | addr, |
int | val | ||
) |
XPU_D unsigned int xpu::atomic_sub_block | ( | unsigned int * | addr, |
unsigned int | val | ||
) |
XPU_D int xpu::atomic_xor | ( | int * | addr, |
int | val | ||
) |
XPU_D unsigned int xpu::atomic_xor | ( | unsigned int * | addr, |
unsigned int | val | ||
) |
XPU_D int xpu::atomic_xor_block | ( | int * | addr, |
int | val | ||
) |
XPU_D unsigned int xpu::atomic_xor_block | ( | unsigned int * | addr, |
unsigned int | val | ||
) |
XPU_D void xpu::barrier | ( | ContextT & | ctx | ) |
Sync all threads in a block.
barrier(ctx.pos())
. void xpu::call | ( | Args &&... | args | ) |
XPU_D float xpu::cbrt | ( | float | x | ) |
XPU_D float xpu::ceil | ( | float | x | ) |
XPU_D float xpu::copysign | ( | float | x, |
float | y | ||
) |
XPU_D float xpu::cos | ( | float | x | ) |
XPU_D float xpu::cosh | ( | float | x | ) |
XPU_D float xpu::cospi | ( | float | x | ) |
|
constexpr |
XPU_D float xpu::erf | ( | float | x | ) |
XPU_D float xpu::erfc | ( | float | x | ) |
XPU_D float xpu::exp | ( | float | x | ) |
XPU_D float xpu::exp10 | ( | float | x | ) |
XPU_D float xpu::exp2 | ( | float | x | ) |
XPU_D float xpu::expm1 | ( | float | x | ) |
XPU_D float xpu::fdim | ( | float | x, |
float | y | ||
) |
XPU_D int xpu::float_as_int | ( | float | val | ) |
XPU_D float xpu::floor | ( | float | x | ) |
XPU_D float xpu::fma | ( | float | x, |
float | y, | ||
float | z | ||
) |
XPU_D float xpu::fmod | ( | float | x, |
float | y | ||
) |
|
inline |
Free memory allocated with malloc_device, malloc_pinned or malloc_managed.
ptr | Pointer to the memory to free. |
const char* xpu::get_name | ( | ) |
XPU_D float xpu::hypot | ( | float | x, |
float | y | ||
) |
XPU_D int xpu::ilogb | ( | float | x | ) |
|
inline |
Initialize xpu.
settings | Settings to use. Initializes xpu runtime with the given settings. Should be called once at the beginning of the program. Before any other xpu functions are called. |
XPU_D float xpu::int_as_float | ( | int | val | ) |
XPU_D bool xpu::isfinite | ( | float | a | ) |
XPU_D bool xpu::isinf | ( | float | a | ) |
XPU_D bool xpu::isnan | ( | float | a | ) |
void xpu::k_add_bytes | ( | size_t | bytes | ) |
Add bytes of input to the given kernel. This is used to calculate the throughput.
XPU_D float xpu::ldexp | ( | float | x, |
int | exp | ||
) |
XPU_D long long int xpu::llrint | ( | float | x | ) |
XPU_D long long int xpu::llround | ( | float | x | ) |
XPU_D float xpu::log | ( | float | x | ) |
XPU_D float xpu::log10 | ( | float | x | ) |
XPU_D float xpu::log1p | ( | float | x | ) |
XPU_D float xpu::log2 | ( | float | x | ) |
XPU_D float xpu::logb | ( | float | x | ) |
XPU_D long int xpu::lrint | ( | float | x | ) |
XPU_D long int xpu::lround | ( | float | x | ) |
void* xpu::malloc_device | ( | size_t | size_bytes | ) |
Allocate memory on the device.
size_bytes | Size of the memory to allocate in bytes. |
elems | Number of elements to allocate. |
T | Type of the memory to allocate. |
T* xpu::malloc_host | ( | size_t | elems | ) |
Allocate pinned memory on the host that can be accessed by the device.
elems | Number of elements to allocate. |
T | Type of the memory to allocate. |
void* xpu::malloc_managed | ( | size_t | size_bytes | ) |
Allocate memory that can be accessed by the device and the host.
size | Size of the memory to allocate in bytes. |
elems | Number of elements to allocate. |
T | Type of the memory to allocate. |
void* xpu::malloc_pinned | ( | size_t | size_bytes | ) |
Allocate pinned memory on the host that can be accessed by the device.
size | Size of the memory to allocate in bytes. |
elems | Number of elements to allocate. |
T | Type of the memory to allocate. |
XPU_D float xpu::max | ( | float | a, |
float | b | ||
) |
XPU_D int xpu::max | ( | int | a, |
int | b | ||
) |
XPU_D long long int xpu::max | ( | long long int | a, |
long long int | b | ||
) |
XPU_D unsigned int xpu::max | ( | unsigned int | a, |
unsigned int | b | ||
) |
XPU_D unsigned long long int xpu::max | ( | unsigned long long int | a, |
unsigned long long int | b | ||
) |
XPU_D float xpu::min | ( | float | a, |
float | b | ||
) |
XPU_D int xpu::min | ( | int | a, |
int | b | ||
) |
XPU_D long long int xpu::min | ( | long long int | a, |
long long int | b | ||
) |
XPU_D unsigned int xpu::min | ( | unsigned int | a, |
unsigned int | b | ||
) |
XPU_D unsigned long long int xpu::min | ( | unsigned long long int | a, |
unsigned long long int | b | ||
) |
Construct a grid with the given number of blocks in each dimension.
Construct a grid with the given number of threads in each dimension If the number of threads is not a multiple of the block size, the grid size will be rounded up to the next multiple of the block size.
XPU_D float xpu::nan | ( | const char * | tagp | ) |
XPU_D float xpu::norm3d | ( | float | a, |
float | b, | ||
float | c | ||
) |
XPU_D float xpu::norm4d | ( | float | a, |
float | b, | ||
float | c, | ||
float | d | ||
) |
|
constexpr |
|
constexpr |
|
constexpr |
timings xpu::pop_timer | ( | ) |
Stops the last timer started with xpu::push_timer.
XPU_D float xpu::pow | ( | float | x, |
float | y | ||
) |
void xpu::preload | ( | ) |
Preload the given device image.
I | Device image type. This call is optional. If not preloaded, the device image will be loaded automatically when the first kernel is launched. |
void xpu::push_timer | ( | std::string_view | name | ) |
Create a new timer.
XPU_D float xpu::rcbrt | ( | float | x | ) |
XPU_D float xpu::remainder | ( | float | x, |
float | y | ||
) |
XPU_D float xpu::remquo | ( | float | x, |
float | y, | ||
int * | quo | ||
) |
XPU_D float xpu::rhypot | ( | float | x, |
float | y | ||
) |
XPU_D float xpu::rint | ( | float | x | ) |
XPU_D float xpu::rnorm3d | ( | float | a, |
float | b, | ||
float | c | ||
) |
XPU_D float xpu::rnorm4d | ( | float | a, |
float | b, | ||
float | c, | ||
float | d | ||
) |
XPU_D float xpu::round | ( | float | x | ) |
XPU_D float xpu::rsqrt | ( | float | x | ) |
void xpu::set | ( | const typename C::data_t & | symbol | ) |
XPU_D bool xpu::signbit | ( | float | a | ) |
XPU_D float xpu::sin | ( | float | x | ) |
XPU_D void xpu::sincos | ( | float | x, |
float * | sptr, | ||
float * | cptr | ||
) |
XPU_D void xpu::sincospi | ( | float | x, |
float * | sptr, | ||
float * | cptr | ||
) |
XPU_D float xpu::sinh | ( | float | x | ) |
XPU_D float xpu::sinpi | ( | float | x | ) |
XPU_D float xpu::sqrt | ( | float | x | ) |
|
constexpr |
void xpu::stack_alloc | ( | size_t | size | ) |
Allocate the stack memory on the device.
void xpu::stack_pop | ( | void * | head = nullptr | ) |
Pop entries from the stack.
head | Pointer to the stack entry to pop or nullptr to pop the entire stack. |
void xpu::t_add_bytes | ( | size_t | bytes | ) |
Add bytes of input to the current timer. This is used to calculate the throughput.
XPU_D float xpu::tan | ( | float | x | ) |
XPU_D float xpu::tanh | ( | float | x | ) |
XPU_D float xpu::tanpi | ( | float | x | ) |
XPU_D float xpu::tgamma | ( | float | x | ) |
XPU_D float xpu::trunc | ( | float | x | ) |
|
inlineconstexpr |