xpu
xpu Namespace Reference

xpu default namespace. More...

Classes

struct  dim
 
struct  grid
 3d execution grid describing the number of blocks and threads of a kernel Use 'n_blocks' or 'n_threads' to construct a grid. More...
 
class  buffer
 
struct  device_image
 
struct  block_size
 
struct  no_smem
 
struct  openmp_settings
 OpenMP settings for kernels. More...
 
class  tpos
 
class  cmem
 
struct  kernel
 
struct  function
 
struct  constant
 
class  kernel_context
 
class  view
 
class  block_scan
 Parallel scan inside a block. More...
 
class  block_reduce
 
class  block_sort
 
class  block_merge
 
class  exception
 
struct  settings
 Settings used to initialize xpu. More...
 
class  device
 
class  device_prop
 
class  queue
 command queue for a device. More...
 
class  h_view
 Create a view from a buffer. Create a view from a buffer to access the underlying data on the host. The view is a lightweight wrapper around the buffer and does not own the data. If the underlying buffer can't be accessed on the host, an runtime_error is thrown. Note that no synchronization with the device is performed, so the data may be out of date. More...
 
class  ptr_prop
 Properties of a pointer. Properties of a pointer allocated with malloc_device, malloc_host or malloc_shared. More...
 
class  buffer_prop
 
class  kernel_timings
 Execution times collected for a kernel. More...
 
class  timings
 Timing information collected via xpu::push_timer and xpu::pop_timer. More...
 
class  scoped_timer
 RAII wrapper for timing functions. More...
 

Enumerations

enum  driver_t { cpu = detail::cpu , cuda = detail::cuda , hip = detail::hip , sycl = detail::sycl }
 
enum  buffer_type {
  buf_pinned = detail::buf_pinned , buf_device = detail::buf_device , buf_managed = detail::buf_managed , buf_io = detail::buf_io ,
  buf_stack = detail::buf_stack
}
 
enum  schedule_t { schedule_static , schedule_dynamic }
 OpenMP schedule types. Used for specifying the schedule type for kernels. More...
 
enum  direction { h2d = detail::dir_h2d , d2h = detail::dir_d2h }
 
enum class  mem_type { pinned = detail::mem_pinned , device = detail::mem_device , managed = detail::mem_managed , host = detail::mem_host }
 

Functions

grid n_blocks (dim nblocks)
 Construct a grid with the given number of blocks in each dimension. More...
 
grid n_threads (dim nthreads)
 Construct a grid with the given number of threads in each dimension If the number of threads is not a multiple of the block size, the grid size will be rounded up to the next multiple of the block size. More...
 
constexpr XPU_D float pi ()
 
constexpr XPU_D float pi_2 ()
 
constexpr XPU_D float pi_4 ()
 
constexpr XPU_D float deg_to_rad ()
 
constexpr XPU_D float sqrt2 ()
 
XPU_D int abs (int x)
 
XPU_D float abs (float x)
 
XPU_D float acos (float x)
 
XPU_D float acosh (float x)
 
XPU_D float acospi (float x)
 
XPU_D float asin (float x)
 
XPU_D float asinh (float x)
 
XPU_D float asinpi (float x)
 
XPU_D float atan (float x)
 
XPU_D float atan2 (float y, float x)
 
XPU_D float atanh (float x)
 
XPU_D float atanpi (float x)
 
XPU_D float atan2pi (float y, float x)
 
XPU_D float cbrt (float x)
 
XPU_D float ceil (float x)
 
XPU_D float copysign (float x, float y)
 
XPU_D float cos (float x)
 
XPU_D float cosh (float x)
 
XPU_D float cospi (float x)
 
XPU_D float erf (float x)
 
XPU_D float erfc (float x)
 
XPU_D float exp (float x)
 
XPU_D float exp2 (float x)
 
XPU_D float exp10 (float x)
 
XPU_D float expm1 (float x)
 
XPU_D float fdim (float x, float y)
 
XPU_D float floor (float x)
 
XPU_D float fma (float x, float y, float z)
 
XPU_D float fmod (float x, float y)
 
XPU_D float hypot (float x, float y)
 
XPU_D int ilogb (float x)
 
XPU_D bool isfinite (float a)
 
XPU_D bool isinf (float a)
 
XPU_D bool isnan (float a)
 
XPU_D float ldexp (float x, int exp)
 
XPU_D long long int llrint (float x)
 
XPU_D long long int llround (float x)
 
XPU_D float log (float x)
 
XPU_D float log10 (float x)
 
XPU_D float log1p (float x)
 
XPU_D float log2 (float x)
 
XPU_D float logb (float x)
 
XPU_D long int lrint (float x)
 
XPU_D long int lround (float x)
 
XPU_D int max (int a, int b)
 
XPU_D unsigned int max (unsigned int a, unsigned int b)
 
XPU_D long long int max (long long int a, long long int b)
 
XPU_D unsigned long long int max (unsigned long long int a, unsigned long long int b)
 
XPU_D float max (float a, float b)
 
XPU_D int min (int a, int b)
 
XPU_D unsigned int min (unsigned int a, unsigned int b)
 
XPU_D long long int min (long long int a, long long int b)
 
XPU_D unsigned long long int min (unsigned long long int a, unsigned long long int b)
 
XPU_D float min (float a, float b)
 
XPU_D float nan (const char *tagp)
 
XPU_D float norm3d (float a, float b, float c)
 
XPU_D float norm4d (float a, float b, float c, float d)
 
XPU_D float pow (float x, float y)
 
XPU_D float rcbrt (float x)
 
XPU_D float remainder (float x, float y)
 
XPU_D float remquo (float x, float y, int *quo)
 
XPU_D float rint (float x)
 
XPU_D float rhypot (float x, float y)
 
XPU_D float rnorm3d (float a, float b, float c)
 
XPU_D float rnorm4d (float a, float b, float c, float d)
 
XPU_D float round (float x)
 
XPU_D float rsqrt (float x)
 
XPU_D bool signbit (float a)
 
XPU_D void sincos (float x, float *sptr, float *cptr)
 
XPU_D void sincospi (float x, float *sptr, float *cptr)
 
XPU_D float sin (float x)
 
XPU_D float sinh (float x)
 
XPU_D float sinpi (float x)
 
XPU_D float sqrt (float x)
 
XPU_D float tan (float x)
 
XPU_D float tanh (float x)
 
XPU_D float tanpi (float x)
 
XPU_D float tgamma (float x)
 
XPU_D float trunc (float x)
 
XPU_D int atomic_cas (int *addr, int compare, int val)
 
XPU_D unsigned int atomic_cas (unsigned int *addr, unsigned int compare, unsigned int val)
 
XPU_D float atomic_cas (float *addr, float compare, float val)
 
XPU_D int atomic_cas_block (int *addr, int compare, int val)
 
XPU_D unsigned int atomic_cas_block (unsigned int *addr, unsigned int compare, unsigned int val)
 
XPU_D float atomic_cas_block (float *addr, float compare, float val)
 
XPU_D int atomic_add (int *addr, int val)
 
XPU_D unsigned int atomic_add (unsigned int *addr, unsigned int val)
 
XPU_D float atomic_add (float *addr, float val)
 
XPU_D int atomic_add_block (int *addr, int val)
 
XPU_D unsigned int atomic_add_block (unsigned int *addr, unsigned int val)
 
XPU_D float atomic_add_block (float *addr, float val)
 
XPU_D int atomic_sub (int *addr, int val)
 
XPU_D unsigned int atomic_sub (unsigned int *addr, unsigned int val)
 
XPU_D int atomic_sub_block (int *addr, int val)
 
XPU_D unsigned int atomic_sub_block (unsigned int *addr, unsigned int val)
 
XPU_D int atomic_and (int *addr, int val)
 
XPU_D unsigned int atomic_and (unsigned int *addr, unsigned int val)
 
XPU_D int atomic_and_block (int *addr, int val)
 
XPU_D unsigned int atomic_and_block (unsigned int *addr, unsigned int val)
 
XPU_D int atomic_or (int *addr, int val)
 
XPU_D unsigned int atomic_or (unsigned int *addr, unsigned int val)
 
XPU_D int atomic_or_block (int *addr, int val)
 
XPU_D unsigned int atomic_or_block (unsigned int *addr, unsigned int val)
 
XPU_D int atomic_xor (int *addr, int val)
 
XPU_D unsigned int atomic_xor (unsigned int *addr, unsigned int val)
 
XPU_D int atomic_xor_block (int *addr, int val)
 
XPU_D unsigned int atomic_xor_block (unsigned int *addr, unsigned int val)
 
XPU_D int float_as_int (float val)
 
XPU_D float int_as_float (int val)
 
XPU_D void barrier (tpos &)
 Sync all threads in a block. More...
 
template<typename ContextT >
XPU_D void barrier (ContextT &ctx)
 Sync all threads in a block. More...
 
void initialize (settings={})
 Initialize xpu. More...
 
template<typename I >
void preload ()
 Preload the given device image. More...
 
void * malloc_device (size_t size_bytes)
 Allocate memory on the device. More...
 
void * malloc_pinned (size_t size_bytes)
 Allocate pinned memory on the host that can be accessed by the device. More...
 
template<typename T >
T * malloc_host (size_t elems)
 Allocate pinned memory on the host that can be accessed by the device. More...
 
void * malloc_managed (size_t size_bytes)
 Allocate memory that can be accessed by the device and the host. More...
 
void free (void *)
 Free memory allocated with malloc_device, malloc_pinned or malloc_managed. More...
 
void stack_alloc (size_t size)
 Allocate the stack memory on the device. More...
 
void stack_pop (void *head=nullptr)
 Pop entries from the stack. More...
 
template<typename Kernel >
const char * get_name ()
 
template<typename Func , typename... Args>
void call (Args &&... args)
 
template<typename C >
void set (const typename C::data_t &symbol)
 
void push_timer (std::string_view name)
 
timings pop_timer ()
 
void t_add_bytes (size_t bytes)
 
template<typename Kernel >
void k_add_bytes (size_t bytes)
 

Variables

constexpr driver_t compilation_target = XPU_DETAIL_COMPILATION_TARGET
 

Detailed Description

xpu default namespace.

Enumeration Type Documentation

◆ buffer_type

Enumerator
buf_pinned 
buf_device 
buf_managed 
buf_io 
buf_stack 

◆ direction

Enum to specify the direction of a memory transfer.

Enumerator
h2d 

Host to device transfer.

d2h 

Device to host transfer.

◆ driver_t

Enumerator
cpu 
cuda 
hip 
sycl 

◆ mem_type

enum xpu::mem_type
strong

Different types of allocated memory.

Enumerator
pinned 

Memory allocated on the host by the GPU driver. Can be accessed by the device.

device 

Memory allocated on the device by the GPU driver.

managed 

Memory allocated on the host and device by the GPU driver. GPU driver will synchronise data with the device when needed.

host 

Host memory allocated by the user. Can't be accessed on the device. By default, all memory not in the previous categories is assumed to be host memory.

◆ schedule_t

OpenMP schedule types. Used for specifying the schedule type for kernels.

Enumerator
schedule_static 
schedule_dynamic 

Function Documentation

◆ abs() [1/2]

XPU_D float xpu::abs ( float  x)

◆ abs() [2/2]

XPU_D int xpu::abs ( int  x)

◆ acos()

XPU_D float xpu::acos ( float  x)

◆ acosh()

XPU_D float xpu::acosh ( float  x)

◆ acospi()

XPU_D float xpu::acospi ( float  x)

◆ asin()

XPU_D float xpu::asin ( float  x)

◆ asinh()

XPU_D float xpu::asinh ( float  x)

◆ asinpi()

XPU_D float xpu::asinpi ( float  x)

◆ atan()

XPU_D float xpu::atan ( float  x)

◆ atan2()

XPU_D float xpu::atan2 ( float  y,
float  x 
)

◆ atan2pi()

XPU_D float xpu::atan2pi ( float  y,
float  x 
)

◆ atanh()

XPU_D float xpu::atanh ( float  x)

◆ atanpi()

XPU_D float xpu::atanpi ( float  x)

◆ atomic_add() [1/3]

XPU_D float xpu::atomic_add ( float *  addr,
float  val 
)

◆ atomic_add() [2/3]

XPU_D int xpu::atomic_add ( int *  addr,
int  val 
)

◆ atomic_add() [3/3]

XPU_D unsigned int xpu::atomic_add ( unsigned int *  addr,
unsigned int  val 
)

◆ atomic_add_block() [1/3]

XPU_D float xpu::atomic_add_block ( float *  addr,
float  val 
)

◆ atomic_add_block() [2/3]

XPU_D int xpu::atomic_add_block ( int *  addr,
int  val 
)

◆ atomic_add_block() [3/3]

XPU_D unsigned int xpu::atomic_add_block ( unsigned int *  addr,
unsigned int  val 
)

◆ atomic_and() [1/2]

XPU_D int xpu::atomic_and ( int *  addr,
int  val 
)

◆ atomic_and() [2/2]

XPU_D unsigned int xpu::atomic_and ( unsigned int *  addr,
unsigned int  val 
)

◆ atomic_and_block() [1/2]

XPU_D int xpu::atomic_and_block ( int *  addr,
int  val 
)

◆ atomic_and_block() [2/2]

XPU_D unsigned int xpu::atomic_and_block ( unsigned int *  addr,
unsigned int  val 
)

◆ atomic_cas() [1/3]

XPU_D float xpu::atomic_cas ( float *  addr,
float  compare,
float  val 
)

◆ atomic_cas() [2/3]

XPU_D int xpu::atomic_cas ( int *  addr,
int  compare,
int  val 
)

◆ atomic_cas() [3/3]

XPU_D unsigned int xpu::atomic_cas ( unsigned int *  addr,
unsigned int  compare,
unsigned int  val 
)

◆ atomic_cas_block() [1/3]

XPU_D float xpu::atomic_cas_block ( float *  addr,
float  compare,
float  val 
)

◆ atomic_cas_block() [2/3]

XPU_D int xpu::atomic_cas_block ( int *  addr,
int  compare,
int  val 
)

◆ atomic_cas_block() [3/3]

XPU_D unsigned int xpu::atomic_cas_block ( unsigned int *  addr,
unsigned int  compare,
unsigned int  val 
)

◆ atomic_or() [1/2]

XPU_D int xpu::atomic_or ( int *  addr,
int  val 
)

◆ atomic_or() [2/2]

XPU_D unsigned int xpu::atomic_or ( unsigned int *  addr,
unsigned int  val 
)

◆ atomic_or_block() [1/2]

XPU_D int xpu::atomic_or_block ( int *  addr,
int  val 
)

◆ atomic_or_block() [2/2]

XPU_D unsigned int xpu::atomic_or_block ( unsigned int *  addr,
unsigned int  val 
)

◆ atomic_sub() [1/2]

XPU_D int xpu::atomic_sub ( int *  addr,
int  val 
)

◆ atomic_sub() [2/2]

XPU_D unsigned int xpu::atomic_sub ( unsigned int *  addr,
unsigned int  val 
)

◆ atomic_sub_block() [1/2]

XPU_D int xpu::atomic_sub_block ( int *  addr,
int  val 
)

◆ atomic_sub_block() [2/2]

XPU_D unsigned int xpu::atomic_sub_block ( unsigned int *  addr,
unsigned int  val 
)

◆ atomic_xor() [1/2]

XPU_D int xpu::atomic_xor ( int *  addr,
int  val 
)

◆ atomic_xor() [2/2]

XPU_D unsigned int xpu::atomic_xor ( unsigned int *  addr,
unsigned int  val 
)

◆ atomic_xor_block() [1/2]

XPU_D int xpu::atomic_xor_block ( int *  addr,
int  val 
)

◆ atomic_xor_block() [2/2]

XPU_D unsigned int xpu::atomic_xor_block ( unsigned int *  addr,
unsigned int  val 
)

◆ barrier() [1/2]

template<typename ContextT >
XPU_D void xpu::barrier ( ContextT &  ctx)

Sync all threads in a block.

Note
This function is a shortcut for barrier(ctx.pos()).

◆ barrier() [2/2]

XPU_D void xpu::barrier ( tpos )

Sync all threads in a block.

◆ call()

template<typename Func , typename... Args>
void xpu::call ( Args &&...  args)

◆ cbrt()

XPU_D float xpu::cbrt ( float  x)

◆ ceil()

XPU_D float xpu::ceil ( float  x)

◆ copysign()

XPU_D float xpu::copysign ( float  x,
float  y 
)

◆ cos()

XPU_D float xpu::cos ( float  x)

◆ cosh()

XPU_D float xpu::cosh ( float  x)

◆ cospi()

XPU_D float xpu::cospi ( float  x)

◆ deg_to_rad()

constexpr XPU_D float xpu::deg_to_rad ( )
constexpr

◆ erf()

XPU_D float xpu::erf ( float  x)

◆ erfc()

XPU_D float xpu::erfc ( float  x)

◆ exp()

XPU_D float xpu::exp ( float  x)

◆ exp10()

XPU_D float xpu::exp10 ( float  x)

◆ exp2()

XPU_D float xpu::exp2 ( float  x)

◆ expm1()

XPU_D float xpu::expm1 ( float  x)

◆ fdim()

XPU_D float xpu::fdim ( float  x,
float  y 
)

◆ float_as_int()

XPU_D int xpu::float_as_int ( float  val)

◆ floor()

XPU_D float xpu::floor ( float  x)

◆ fma()

XPU_D float xpu::fma ( float  x,
float  y,
float  z 
)

◆ fmod()

XPU_D float xpu::fmod ( float  x,
float  y 
)

◆ free()

void xpu::free ( void *  )
inline

Free memory allocated with malloc_device, malloc_pinned or malloc_managed.

Parameters
ptrPointer to the memory to free.

◆ get_name()

template<typename Kernel >
const char* xpu::get_name ( )

◆ hypot()

XPU_D float xpu::hypot ( float  x,
float  y 
)

◆ ilogb()

XPU_D int xpu::ilogb ( float  x)

◆ initialize()

void xpu::initialize ( settings  = {})
inline

Initialize xpu.

Parameters
settingsSettings to use. Initializes xpu runtime with the given settings. Should be called once at the beginning of the program. Before any other xpu functions are called.
See also
xpu::settings

◆ int_as_float()

XPU_D float xpu::int_as_float ( int  val)

◆ isfinite()

XPU_D bool xpu::isfinite ( float  a)

◆ isinf()

XPU_D bool xpu::isinf ( float  a)

◆ isnan()

XPU_D bool xpu::isnan ( float  a)

◆ k_add_bytes()

template<typename Kernel >
void xpu::k_add_bytes ( size_t  bytes)

Add bytes of input to the given kernel. This is used to calculate the throughput.

◆ ldexp()

XPU_D float xpu::ldexp ( float  x,
int  exp 
)

◆ llrint()

XPU_D long long int xpu::llrint ( float  x)

◆ llround()

XPU_D long long int xpu::llround ( float  x)

◆ log()

XPU_D float xpu::log ( float  x)

◆ log10()

XPU_D float xpu::log10 ( float  x)

◆ log1p()

XPU_D float xpu::log1p ( float  x)

◆ log2()

XPU_D float xpu::log2 ( float  x)

◆ logb()

XPU_D float xpu::logb ( float  x)

◆ lrint()

XPU_D long int xpu::lrint ( float  x)

◆ lround()

XPU_D long int xpu::lround ( float  x)

◆ malloc_device()

void* xpu::malloc_device ( size_t  size_bytes)

Allocate memory on the device.

Parameters
size_bytesSize of the memory to allocate in bytes.
elemsNumber of elements to allocate.
Template Parameters
TType of the memory to allocate.
Note
The memory is not initialized.

◆ malloc_host()

template<typename T >
T* xpu::malloc_host ( size_t  elems)

Allocate pinned memory on the host that can be accessed by the device.

Parameters
elemsNumber of elements to allocate.
Template Parameters
TType of the memory to allocate.
Note
The memory is not initialized.

◆ malloc_managed()

void* xpu::malloc_managed ( size_t  size_bytes)

Allocate memory that can be accessed by the device and the host.

Parameters
sizeSize of the memory to allocate in bytes.
elemsNumber of elements to allocate.
Template Parameters
TType of the memory to allocate.
Note
The memory is not initialized.

◆ malloc_pinned()

void* xpu::malloc_pinned ( size_t  size_bytes)

Allocate pinned memory on the host that can be accessed by the device.

Parameters
sizeSize of the memory to allocate in bytes.
elemsNumber of elements to allocate.
Template Parameters
TType of the memory to allocate.
Note
The memory is not initialized.

◆ max() [1/5]

XPU_D float xpu::max ( float  a,
float  b 
)

◆ max() [2/5]

XPU_D int xpu::max ( int  a,
int  b 
)

◆ max() [3/5]

XPU_D long long int xpu::max ( long long int  a,
long long int  b 
)

◆ max() [4/5]

XPU_D unsigned int xpu::max ( unsigned int  a,
unsigned int  b 
)

◆ max() [5/5]

XPU_D unsigned long long int xpu::max ( unsigned long long int  a,
unsigned long long int  b 
)

◆ min() [1/5]

XPU_D float xpu::min ( float  a,
float  b 
)

◆ min() [2/5]

XPU_D int xpu::min ( int  a,
int  b 
)

◆ min() [3/5]

XPU_D long long int xpu::min ( long long int  a,
long long int  b 
)

◆ min() [4/5]

XPU_D unsigned int xpu::min ( unsigned int  a,
unsigned int  b 
)

◆ min() [5/5]

XPU_D unsigned long long int xpu::min ( unsigned long long int  a,
unsigned long long int  b 
)

◆ n_blocks()

grid xpu::n_blocks ( dim  nblocks)
inline

Construct a grid with the given number of blocks in each dimension.

◆ n_threads()

grid xpu::n_threads ( dim  nthreads)
inline

Construct a grid with the given number of threads in each dimension If the number of threads is not a multiple of the block size, the grid size will be rounded up to the next multiple of the block size.

◆ nan()

XPU_D float xpu::nan ( const char *  tagp)

◆ norm3d()

XPU_D float xpu::norm3d ( float  a,
float  b,
float  c 
)

◆ norm4d()

XPU_D float xpu::norm4d ( float  a,
float  b,
float  c,
float  d 
)

◆ pi()

constexpr XPU_D float xpu::pi ( )
constexpr

◆ pi_2()

constexpr XPU_D float xpu::pi_2 ( )
constexpr

◆ pi_4()

constexpr XPU_D float xpu::pi_4 ( )
constexpr

◆ pop_timer()

timings xpu::pop_timer ( )

Stops the last timer started with xpu::push_timer.

Returns
Collected timings.
See also
xpu::push_timer, xpu::timings

◆ pow()

XPU_D float xpu::pow ( float  x,
float  y 
)

◆ preload()

template<typename I >
void xpu::preload ( )

Preload the given device image.

Template Parameters
IDevice image type. This call is optional. If not preloaded, the device image will be loaded automatically when the first kernel is launched.

◆ push_timer()

void xpu::push_timer ( std::string_view  name)

Create a new timer.

See also
xpu::pop_timer, xpu::timings

◆ rcbrt()

XPU_D float xpu::rcbrt ( float  x)

◆ remainder()

XPU_D float xpu::remainder ( float  x,
float  y 
)

◆ remquo()

XPU_D float xpu::remquo ( float  x,
float  y,
int *  quo 
)

◆ rhypot()

XPU_D float xpu::rhypot ( float  x,
float  y 
)

◆ rint()

XPU_D float xpu::rint ( float  x)

◆ rnorm3d()

XPU_D float xpu::rnorm3d ( float  a,
float  b,
float  c 
)

◆ rnorm4d()

XPU_D float xpu::rnorm4d ( float  a,
float  b,
float  c,
float  d 
)

◆ round()

XPU_D float xpu::round ( float  x)

◆ rsqrt()

XPU_D float xpu::rsqrt ( float  x)

◆ set()

template<typename C >
void xpu::set ( const typename C::data_t &  symbol)

◆ signbit()

XPU_D bool xpu::signbit ( float  a)

◆ sin()

XPU_D float xpu::sin ( float  x)

◆ sincos()

XPU_D void xpu::sincos ( float  x,
float *  sptr,
float *  cptr 
)

◆ sincospi()

XPU_D void xpu::sincospi ( float  x,
float *  sptr,
float *  cptr 
)

◆ sinh()

XPU_D float xpu::sinh ( float  x)

◆ sinpi()

XPU_D float xpu::sinpi ( float  x)

◆ sqrt()

XPU_D float xpu::sqrt ( float  x)

◆ sqrt2()

constexpr XPU_D float xpu::sqrt2 ( )
constexpr

◆ stack_alloc()

void xpu::stack_alloc ( size_t  size)

Allocate the stack memory on the device.

◆ stack_pop()

void xpu::stack_pop ( void *  head = nullptr)

Pop entries from the stack.

Parameters
headPointer to the stack entry to pop or nullptr to pop the entire stack.

◆ t_add_bytes()

void xpu::t_add_bytes ( size_t  bytes)

Add bytes of input to the current timer. This is used to calculate the throughput.

◆ tan()

XPU_D float xpu::tan ( float  x)

◆ tanh()

XPU_D float xpu::tanh ( float  x)

◆ tanpi()

XPU_D float xpu::tanpi ( float  x)

◆ tgamma()

XPU_D float xpu::tgamma ( float  x)

◆ trunc()

XPU_D float xpu::trunc ( float  x)

Variable Documentation

◆ compilation_target

constexpr driver_t xpu::compilation_target = XPU_DETAIL_COMPILATION_TARGET
inlineconstexpr