xpu default namespace. More...

Classes
struct	dim

struct	grid
	3d execution grid describing the number of blocks and threads of a kernel Use 'n_blocks' or 'n_threads' to construct a grid. More...

class	buffer

struct	device_image

struct	block_size

struct	no_smem

struct	openmp_settings
	OpenMP settings for kernels. More...

class	tpos

class	cmem

struct	kernel

struct	function

struct	constant

class	kernel_context

class	view

class	block_scan
	Parallel scan inside a block. More...

class	block_reduce

class	block_sort

class	block_merge

class	exception

struct	settings
	Settings used to initialize xpu. More...

class	device

class	device_prop

class	queue
	command queue for a device. More...

class	h_view
	Create a view from a buffer. Create a view from a buffer to access the underlying data on the host. The view is a lightweight wrapper around the buffer and does not own the data. If the underlying buffer can't be accessed on the host, an runtime_error is thrown. Note that no synchronization with the device is performed, so the data may be out of date. More...

class	ptr_prop
	Properties of a pointer. Properties of a pointer allocated with malloc_device, malloc_host or malloc_shared. More...

class	buffer_prop

class	kernel_timings
	Execution times collected for a kernel. More...

class	timings
	Timing information collected via xpu::push_timer and xpu::pop_timer. More...

class	scoped_timer
	RAII wrapper for timing functions. More...

Enumerations
enum	driver_t { cpu = detail::cpu , cuda = detail::cuda , hip = detail::hip , sycl = detail::sycl }

enum	buffer_type { buf_pinned = detail::buf_pinned , buf_device = detail::buf_device , buf_managed = detail::buf_managed , buf_io = detail::buf_io , buf_stack = detail::buf_stack }

enum	schedule_t { schedule_static , schedule_dynamic }
	OpenMP schedule types. Used for specifying the schedule type for kernels. More...

enum	direction { h2d = detail::dir_h2d , d2h = detail::dir_d2h }

enum class	mem_type { pinned = detail::mem_pinned , device = detail::mem_device , managed = detail::mem_managed , host = detail::mem_host }

Functions
grid	n_blocks (dim nblocks)
	Construct a grid with the given number of blocks in each dimension. More...

grid	n_threads (dim nthreads)
	Construct a grid with the given number of threads in each dimension If the number of threads is not a multiple of the block size, the grid size will be rounded up to the next multiple of the block size. More...

constexpr XPU_D float	pi ()

constexpr XPU_D float	pi_2 ()

constexpr XPU_D float	pi_4 ()

constexpr XPU_D float	deg_to_rad ()

constexpr XPU_D float	sqrt2 ()

XPU_D int	abs (int x)

XPU_D float	abs (float x)

XPU_D float	acos (float x)

XPU_D float	acosh (float x)

XPU_D float	acospi (float x)

XPU_D float	asin (float x)

XPU_D float	asinh (float x)

XPU_D float	asinpi (float x)

XPU_D float	atan (float x)

XPU_D float	atan2 (float y, float x)

XPU_D float	atanh (float x)

XPU_D float	atanpi (float x)

XPU_D float	atan2pi (float y, float x)

XPU_D float	cbrt (float x)

XPU_D float	ceil (float x)

XPU_D float	copysign (float x, float y)

XPU_D float	cos (float x)

XPU_D float	cosh (float x)

XPU_D float	cospi (float x)

XPU_D float	erf (float x)

XPU_D float	erfc (float x)

XPU_D float	exp (float x)

XPU_D float	exp2 (float x)

XPU_D float	exp10 (float x)

XPU_D float	expm1 (float x)

XPU_D float	fdim (float x, float y)

XPU_D float	floor (float x)

XPU_D float	fma (float x, float y, float z)

XPU_D float	fmod (float x, float y)

XPU_D float	hypot (float x, float y)

XPU_D int	ilogb (float x)

XPU_D bool	isfinite (float a)

XPU_D bool	isinf (float a)

XPU_D bool	isnan (float a)

XPU_D float	ldexp (float x, int exp)

XPU_D long long int	llrint (float x)

XPU_D long long int	llround (float x)

XPU_D float	log (float x)

XPU_D float	log10 (float x)

XPU_D float	log1p (float x)

XPU_D float	log2 (float x)

XPU_D float	logb (float x)

XPU_D long int	lrint (float x)

XPU_D long int	lround (float x)

XPU_D int	max (int a, int b)

XPU_D unsigned int	max (unsigned int a, unsigned int b)

XPU_D long long int	max (long long int a, long long int b)

XPU_D unsigned long long int	max (unsigned long long int a, unsigned long long int b)

XPU_D float	max (float a, float b)

XPU_D int	min (int a, int b)

XPU_D unsigned int	min (unsigned int a, unsigned int b)

XPU_D long long int	min (long long int a, long long int b)

XPU_D unsigned long long int	min (unsigned long long int a, unsigned long long int b)

XPU_D float	min (float a, float b)

XPU_D float	nan (const char *tagp)

XPU_D float	norm3d (float a, float b, float c)

XPU_D float	norm4d (float a, float b, float c, float d)

XPU_D float	pow (float x, float y)

XPU_D float	rcbrt (float x)

XPU_D float	remainder (float x, float y)

XPU_D float	remquo (float x, float y, int *quo)

XPU_D float	rint (float x)

XPU_D float	rhypot (float x, float y)

XPU_D float	rnorm3d (float a, float b, float c)

XPU_D float	rnorm4d (float a, float b, float c, float d)

XPU_D float	round (float x)

XPU_D float	rsqrt (float x)

XPU_D bool	signbit (float a)

XPU_D void	sincos (float x, float sptr, float cptr)

XPU_D void	sincospi (float x, float sptr, float cptr)

XPU_D float	sin (float x)

XPU_D float	sinh (float x)

XPU_D float	sinpi (float x)

XPU_D float	sqrt (float x)

XPU_D float	tan (float x)

XPU_D float	tanh (float x)

XPU_D float	tanpi (float x)

XPU_D float	tgamma (float x)

XPU_D float	trunc (float x)

XPU_D int	atomic_cas (int *addr, int compare, int val)

XPU_D unsigned int	atomic_cas (unsigned int *addr, unsigned int compare, unsigned int val)

XPU_D float	atomic_cas (float *addr, float compare, float val)

XPU_D int	atomic_cas_block (int *addr, int compare, int val)

XPU_D unsigned int	atomic_cas_block (unsigned int *addr, unsigned int compare, unsigned int val)

XPU_D float	atomic_cas_block (float *addr, float compare, float val)

XPU_D int	atomic_add (int *addr, int val)

XPU_D unsigned int	atomic_add (unsigned int *addr, unsigned int val)

XPU_D float	atomic_add (float *addr, float val)

XPU_D int	atomic_add_block (int *addr, int val)

XPU_D unsigned int	atomic_add_block (unsigned int *addr, unsigned int val)

XPU_D float	atomic_add_block (float *addr, float val)

XPU_D int	atomic_sub (int *addr, int val)

XPU_D unsigned int	atomic_sub (unsigned int *addr, unsigned int val)

XPU_D int	atomic_sub_block (int *addr, int val)

XPU_D unsigned int	atomic_sub_block (unsigned int *addr, unsigned int val)

XPU_D int	atomic_and (int *addr, int val)

XPU_D unsigned int	atomic_and (unsigned int *addr, unsigned int val)

XPU_D int	atomic_and_block (int *addr, int val)

XPU_D unsigned int	atomic_and_block (unsigned int *addr, unsigned int val)

XPU_D int	atomic_or (int *addr, int val)

XPU_D unsigned int	atomic_or (unsigned int *addr, unsigned int val)

XPU_D int	atomic_or_block (int *addr, int val)

XPU_D unsigned int	atomic_or_block (unsigned int *addr, unsigned int val)

XPU_D int	atomic_xor (int *addr, int val)

XPU_D unsigned int	atomic_xor (unsigned int *addr, unsigned int val)

XPU_D int	atomic_xor_block (int *addr, int val)

XPU_D unsigned int	atomic_xor_block (unsigned int *addr, unsigned int val)

XPU_D int	float_as_int (float val)

XPU_D float	int_as_float (int val)

XPU_D void	barrier (tpos &)
	Sync all threads in a block. More...

template<typename ContextT >
XPU_D void	barrier (ContextT &ctx)
	Sync all threads in a block. More...

void	initialize (settings={})
	Initialize xpu. More...

template<typename I >
void	preload ()
	Preload the given device image. More...

void *	malloc_device (size_t size_bytes)
	Allocate memory on the device. More...

void *	malloc_pinned (size_t size_bytes)
	Allocate pinned memory on the host that can be accessed by the device. More...

template<typename T >
T *	malloc_host (size_t elems)
	Allocate pinned memory on the host that can be accessed by the device. More...

void *	malloc_managed (size_t size_bytes)
	Allocate memory that can be accessed by the device and the host. More...

void	free (void *)
	Free memory allocated with malloc_device, malloc_pinned or malloc_managed. More...

void	stack_alloc (size_t size)
	Allocate the stack memory on the device. More...

void	stack_pop (void *head=nullptr)
	Pop entries from the stack. More...

template<typename Kernel >
const char *	get_name ()

template<typename Func , typename... Args>
void	call (Args &&... args)

template<typename C >
void	set (const typename C::data_t &symbol)

void	push_timer (std::string_view name)

timings	pop_timer ()

void	t_add_bytes (size_t bytes)

template<typename Kernel >
void	k_add_bytes (size_t bytes)

Variables
constexpr driver_t	compilation_target = XPU_DETAIL_COMPILATION_TARGET

Detailed Description

xpu default namespace.

Enumeration Type Documentation

◆ buffer_type

enum xpu::buffer_type

Enumerator
buf_pinned
buf_device
buf_managed
buf_io
buf_stack

◆ direction

enum xpu::direction

Enum to specify the direction of a memory transfer.

Enumerator
h2d	Host to device transfer.
d2h	Device to host transfer.

◆ driver_t

enum xpu::driver_t

Enumerator
cpu
cuda
hip
sycl

◆ mem_type

enum xpu::mem_type

strong

Different types of allocated memory.

Enumerator
pinned	Memory allocated on the host by the GPU driver. Can be accessed by the device.
device	Memory allocated on the device by the GPU driver.
managed	Memory allocated on the host and device by the GPU driver. GPU driver will synchronise data with the device when needed.
host	Host memory allocated by the user. Can't be accessed on the device. By default, all memory not in the previous categories is assumed to be host memory.

◆ schedule_t

enum xpu::schedule_t

OpenMP schedule types. Used for specifying the schedule type for kernels.

Enumerator
schedule_static
schedule_dynamic

Function Documentation

◆ abs() [1/2]

XPU_D float xpu::abs ( float x )

◆ abs() [2/2]

XPU_D int xpu::abs ( int x )

◆ acos()

XPU_D float xpu::acos ( float x )

◆ acosh()

XPU_D float xpu::acosh ( float x )

◆ acospi()

XPU_D float xpu::acospi ( float x )

◆ asin()

XPU_D float xpu::asin ( float x )

◆ asinh()

XPU_D float xpu::asinh ( float x )

◆ asinpi()

XPU_D float xpu::asinpi ( float x )

◆ atan()

XPU_D float xpu::atan ( float x )

◆ atan2()

XPU_D float xpu::atan2	(	float	y,
		float	x
	)

◆ atan2pi()

XPU_D float xpu::atan2pi	(	float	y,
		float	x
	)

◆ atanh()

XPU_D float xpu::atanh ( float x )

◆ atanpi()

XPU_D float xpu::atanpi ( float x )

◆ atomic_add() [1/3]

XPU_D float xpu::atomic_add	(	float *	addr,
		float	val
	)

◆ atomic_add() [2/3]

XPU_D int xpu::atomic_add	(	int *	addr,
		int	val
	)

◆ atomic_add() [3/3]

XPU_D unsigned int xpu::atomic_add	(	unsigned int *	addr,
		unsigned int	val
	)

◆ atomic_add_block() [1/3]

XPU_D float xpu::atomic_add_block	(	float *	addr,
		float	val
	)

◆ atomic_add_block() [2/3]

XPU_D int xpu::atomic_add_block	(	int *	addr,
		int	val
	)

◆ atomic_add_block() [3/3]

XPU_D unsigned int xpu::atomic_add_block	(	unsigned int *	addr,
		unsigned int	val
	)

◆ atomic_and() [1/2]

XPU_D int xpu::atomic_and	(	int *	addr,
		int	val
	)

◆ atomic_and() [2/2]

XPU_D unsigned int xpu::atomic_and	(	unsigned int *	addr,
		unsigned int	val
	)

◆ atomic_and_block() [1/2]

XPU_D int xpu::atomic_and_block	(	int *	addr,
		int	val
	)

◆ atomic_and_block() [2/2]

XPU_D unsigned int xpu::atomic_and_block	(	unsigned int *	addr,
		unsigned int	val
	)

◆ atomic_cas() [1/3]

XPU_D float xpu::atomic_cas	(	float *	addr,
		float	compare,
		float	val
	)

◆ atomic_cas() [2/3]

XPU_D int xpu::atomic_cas	(	int *	addr,
		int	compare,
		int	val
	)

◆ atomic_cas() [3/3]

XPU_D unsigned int xpu::atomic_cas	(	unsigned int *	addr,
		unsigned int	compare,
		unsigned int	val
	)

◆ atomic_cas_block() [1/3]

XPU_D float xpu::atomic_cas_block	(	float *	addr,
		float	compare,
		float	val
	)

◆ atomic_cas_block() [2/3]

XPU_D int xpu::atomic_cas_block	(	int *	addr,
		int	compare,
		int	val
	)

◆ atomic_cas_block() [3/3]

XPU_D unsigned int xpu::atomic_cas_block	(	unsigned int *	addr,
		unsigned int	compare,
		unsigned int	val
	)

◆ atomic_or() [1/2]

XPU_D int xpu::atomic_or	(	int *	addr,
		int	val
	)

◆ atomic_or() [2/2]

XPU_D unsigned int xpu::atomic_or	(	unsigned int *	addr,
		unsigned int	val
	)

◆ atomic_or_block() [1/2]

XPU_D int xpu::atomic_or_block	(	int *	addr,
		int	val
	)

◆ atomic_or_block() [2/2]

XPU_D unsigned int xpu::atomic_or_block	(	unsigned int *	addr,
		unsigned int	val
	)

◆ atomic_sub() [1/2]

XPU_D int xpu::atomic_sub	(	int *	addr,
		int	val
	)

◆ atomic_sub() [2/2]

XPU_D unsigned int xpu::atomic_sub	(	unsigned int *	addr,
		unsigned int	val
	)

◆ atomic_sub_block() [1/2]

XPU_D int xpu::atomic_sub_block	(	int *	addr,
		int	val
	)

◆ atomic_sub_block() [2/2]

XPU_D unsigned int xpu::atomic_sub_block	(	unsigned int *	addr,
		unsigned int	val
	)

◆ atomic_xor() [1/2]

XPU_D int xpu::atomic_xor	(	int *	addr,
		int	val
	)

◆ atomic_xor() [2/2]

XPU_D unsigned int xpu::atomic_xor	(	unsigned int *	addr,
		unsigned int	val
	)

◆ atomic_xor_block() [1/2]

XPU_D int xpu::atomic_xor_block	(	int *	addr,
		int	val
	)

◆ atomic_xor_block() [2/2]

XPU_D unsigned int xpu::atomic_xor_block	(	unsigned int *	addr,
		unsigned int	val
	)

◆ barrier() [1/2]

template<typename ContextT >

XPU_D void xpu::barrier ( ContextT & ctx )

Sync all threads in a block.

Note: This function is a shortcut for barrier(ctx.pos()).

◆ barrier() [2/2]

XPU_D void xpu::barrier ( tpos & )

Sync all threads in a block.

◆ call()

template<typename Func , typename... Args>

void xpu::call ( Args &&... args )

◆ cbrt()

XPU_D float xpu::cbrt ( float x )

◆ ceil()

XPU_D float xpu::ceil ( float x )

◆ copysign()

XPU_D float xpu::copysign	(	float	x,
		float	y
	)

◆ cos()

XPU_D float xpu::cos ( float x )

◆ cosh()

XPU_D float xpu::cosh ( float x )

◆ cospi()

XPU_D float xpu::cospi ( float x )

◆ deg_to_rad()

constexpr XPU_D float xpu::deg_to_rad ( )

constexpr

◆ erf()

XPU_D float xpu::erf ( float x )

◆ erfc()

XPU_D float xpu::erfc ( float x )

◆ exp()

XPU_D float xpu::exp ( float x )

◆ exp10()

XPU_D float xpu::exp10 ( float x )

◆ exp2()

XPU_D float xpu::exp2 ( float x )

◆ expm1()

XPU_D float xpu::expm1 ( float x )

◆ fdim()

XPU_D float xpu::fdim	(	float	x,
		float	y
	)

◆ float_as_int()

XPU_D int xpu::float_as_int ( float val )

◆ floor()

XPU_D float xpu::floor ( float x )

◆ fma()

XPU_D float xpu::fma	(	float	x,
		float	y,
		float	z
	)

◆ fmod()

XPU_D float xpu::fmod	(	float	x,
		float	y
	)

◆ free()

void xpu::free ( void * )

inline

Free memory allocated with malloc_device, malloc_pinned or malloc_managed.

Parameters

ptr	Pointer to the memory to free.

◆ get_name()

template<typename Kernel >

const char* xpu::get_name ( )

◆ hypot()

XPU_D float xpu::hypot	(	float	x,
		float	y
	)

◆ ilogb()

XPU_D int xpu::ilogb ( float x )

◆ initialize()

void xpu::initialize ( settings = {} )

inline

Initialize xpu.

Parameters

settings Settings to use. Initializes xpu runtime with the given settings. Should be called once at the beginning of the program. Before any other xpu functions are called.

See also: xpu::settings

◆ int_as_float()

XPU_D float xpu::int_as_float ( int val )

◆ isfinite()

XPU_D bool xpu::isfinite ( float a )

◆ isinf()

XPU_D bool xpu::isinf ( float a )

◆ isnan()

XPU_D bool xpu::isnan ( float a )

◆ k_add_bytes()

template<typename Kernel >

void xpu::k_add_bytes ( size_t bytes )

Add bytes of input to the given kernel. This is used to calculate the throughput.

◆ ldexp()

XPU_D float xpu::ldexp	(	float	x,
		int	exp
	)

◆ llrint()

XPU_D long long int xpu::llrint ( float x )

◆ llround()

XPU_D long long int xpu::llround ( float x )

◆ log()

XPU_D float xpu::log ( float x )

◆ log10()

XPU_D float xpu::log10 ( float x )

◆ log1p()

XPU_D float xpu::log1p ( float x )

◆ log2()

XPU_D float xpu::log2 ( float x )

◆ logb()

XPU_D float xpu::logb ( float x )

◆ lrint()

XPU_D long int xpu::lrint ( float x )

◆ lround()

XPU_D long int xpu::lround ( float x )

◆ malloc_device()

void* xpu::malloc_device ( size_t size_bytes )

Allocate memory on the device.

Parameters

size_bytes	Size of the memory to allocate in bytes.
elems	Number of elements to allocate.

Template Parameters

T	Type of the memory to allocate.

Note: The memory is not initialized.

◆ malloc_host()

template<typename T >

T* xpu::malloc_host ( size_t elems )

Allocate pinned memory on the host that can be accessed by the device.

Parameters

elems Number of elements to allocate.

Template Parameters

T	Type of the memory to allocate.

Note: The memory is not initialized.

◆ malloc_managed()

void* xpu::malloc_managed ( size_t size_bytes )

Allocate memory that can be accessed by the device and the host.

Parameters

size	Size of the memory to allocate in bytes.
elems	Number of elements to allocate.

Template Parameters

T	Type of the memory to allocate.

Note: The memory is not initialized.

◆ malloc_pinned()

void* xpu::malloc_pinned ( size_t size_bytes )

Allocate pinned memory on the host that can be accessed by the device.

Parameters

size	Size of the memory to allocate in bytes.
elems	Number of elements to allocate.

Template Parameters

T	Type of the memory to allocate.

Note: The memory is not initialized.

◆ max() [1/5]

XPU_D float xpu::max	(	float	a,
		float	b
	)

◆ max() [2/5]

XPU_D int xpu::max	(	int	a,
		int	b
	)

◆ max() [3/5]

XPU_D long long int xpu::max	(	long long int	a,
		long long int	b
	)

◆ max() [4/5]

XPU_D unsigned int xpu::max	(	unsigned int	a,
		unsigned int	b
	)

◆ max() [5/5]

XPU_D unsigned long long int xpu::max	(	unsigned long long int	a,
		unsigned long long int	b
	)

◆ min() [1/5]

XPU_D float xpu::min	(	float	a,
		float	b
	)

◆ min() [2/5]

XPU_D int xpu::min	(	int	a,
		int	b
	)

◆ min() [3/5]

XPU_D long long int xpu::min	(	long long int	a,
		long long int	b
	)

◆ min() [4/5]

XPU_D unsigned int xpu::min	(	unsigned int	a,
		unsigned int	b
	)

◆ min() [5/5]

XPU_D unsigned long long int xpu::min	(	unsigned long long int	a,
		unsigned long long int	b
	)

◆ n_blocks()

grid xpu::n_blocks ( dim nblocks )

inline

Construct a grid with the given number of blocks in each dimension.

◆ n_threads()

grid xpu::n_threads ( dim nthreads )

inline

Construct a grid with the given number of threads in each dimension If the number of threads is not a multiple of the block size, the grid size will be rounded up to the next multiple of the block size.

◆ nan()

XPU_D float xpu::nan ( const char * tagp )

◆ norm3d()

XPU_D float xpu::norm3d	(	float	a,
		float	b,
		float	c
	)

◆ norm4d()

XPU_D float xpu::norm4d	(	float	a,
		float	b,
		float	c,
		float	d
	)

◆ pi()

constexpr XPU_D float xpu::pi ( )

constexpr

◆ pi_2()

constexpr XPU_D float xpu::pi_2 ( )

constexpr

◆ pi_4()

constexpr XPU_D float xpu::pi_4 ( )

constexpr

◆ pop_timer()

timings xpu::pop_timer ( )

Stops the last timer started with xpu::push_timer.

Returns: Collected timings.

See also: xpu::push_timer, xpu::timings

◆ pow()

XPU_D float xpu::pow	(	float	x,
		float	y
	)

◆ preload()

template<typename I >

void xpu::preload ( )

Preload the given device image.

Template Parameters

I	Device image type. This call is optional. If not preloaded, the device image will be loaded automatically when the first kernel is launched.

◆ push_timer()

void xpu::push_timer ( std::string_view name )

Create a new timer.

See also: xpu::pop_timer, xpu::timings

◆ rcbrt()

XPU_D float xpu::rcbrt ( float x )

◆ remainder()

XPU_D float xpu::remainder	(	float	x,
		float	y
	)

◆ remquo()

XPU_D float xpu::remquo	(	float	x,
		float	y,
		int *	quo
	)

◆ rhypot()

XPU_D float xpu::rhypot	(	float	x,
		float	y
	)

◆ rint()

XPU_D float xpu::rint ( float x )

◆ rnorm3d()

XPU_D float xpu::rnorm3d	(	float	a,
		float	b,
		float	c
	)

◆ rnorm4d()

XPU_D float xpu::rnorm4d	(	float	a,
		float	b,
		float	c,
		float	d
	)

◆ round()

XPU_D float xpu::round ( float x )

◆ rsqrt()

XPU_D float xpu::rsqrt ( float x )

◆ set()

template<typename C >

void xpu::set ( const typename C::data_t & symbol )

◆ signbit()

XPU_D bool xpu::signbit ( float a )

◆ sin()

XPU_D float xpu::sin ( float x )

◆ sincos()

XPU_D void xpu::sincos	(	float	x,
		float *	sptr,
		float *	cptr
	)

◆ sincospi()

XPU_D void xpu::sincospi	(	float	x,
		float *	sptr,
		float *	cptr
	)

◆ sinh()

XPU_D float xpu::sinh ( float x )

◆ sinpi()

XPU_D float xpu::sinpi ( float x )

◆ sqrt()

XPU_D float xpu::sqrt ( float x )

◆ sqrt2()

constexpr XPU_D float xpu::sqrt2 ( )

constexpr

◆ stack_alloc()

void xpu::stack_alloc ( size_t size )

Allocate the stack memory on the device.

◆ stack_pop()

void xpu::stack_pop ( void * head = nullptr )

Pop entries from the stack.

Parameters

head	Pointer to the stack entry to pop or nullptr to pop the entire stack.

◆ t_add_bytes()

void xpu::t_add_bytes ( size_t bytes )

Add bytes of input to the current timer. This is used to calculate the throughput.

◆ tan()

XPU_D float xpu::tan ( float x )

◆ tanh()

XPU_D float xpu::tanh ( float x )

◆ tanpi()

XPU_D float xpu::tanpi ( float x )

◆ tgamma()

XPU_D float xpu::tgamma ( float x )

◆ trunc()

XPU_D float xpu::trunc ( float x )

Variable Documentation

◆ compilation_target

constexpr driver_t xpu::compilation_target = XPU_DETAIL_COMPILATION_TARGET

inlineconstexpr

Classes

Enumerations

Functions

Variables

Detailed Description

Enumeration Type Documentation

◆ buffer_type

◆ direction

◆ driver_t

◆ mem_type

◆ schedule_t

Function Documentation

◆ abs() [1/2]

◆ abs() [2/2]

◆ acos()

◆ acosh()

◆ acospi()

◆ asin()

◆ asinh()

◆ asinpi()

◆ atan()

◆ atan2()

◆ atan2pi()

◆ atanh()

◆ atanpi()

◆ atomic_add() [1/3]

◆ atomic_add() [2/3]

◆ atomic_add() [3/3]

◆ atomic_add_block() [1/3]

◆ atomic_add_block() [2/3]

◆ atomic_add_block() [3/3]

◆ atomic_and() [1/2]

◆ atomic_and() [2/2]

◆ atomic_and_block() [1/2]

◆ atomic_and_block() [2/2]

◆ atomic_cas() [1/3]

◆ atomic_cas() [2/3]

◆ atomic_cas() [3/3]

◆ atomic_cas_block() [1/3]

◆ atomic_cas_block() [2/3]

◆ atomic_cas_block() [3/3]

◆ atomic_or() [1/2]

◆ atomic_or() [2/2]

◆ atomic_or_block() [1/2]

◆ atomic_or_block() [2/2]

◆ atomic_sub() [1/2]

◆ atomic_sub() [2/2]

◆ atomic_sub_block() [1/2]

◆ atomic_sub_block() [2/2]

◆ atomic_xor() [1/2]

◆ atomic_xor() [2/2]

◆ atomic_xor_block() [1/2]

◆ atomic_xor_block() [2/2]

◆ barrier() [1/2]

◆ barrier() [2/2]

◆ call()

◆ cbrt()

◆ ceil()

◆ copysign()

◆ cos()

◆ cosh()

◆ cospi()

◆ deg_to_rad()

◆ erf()

◆ erfc()

◆ exp()

◆ exp10()

◆ exp2()

◆ expm1()

◆ fdim()

◆ float_as_int()

◆ floor()

◆ fma()

◆ fmod()

◆ free()

◆ get_name()

◆ hypot()

◆ ilogb()

◆ initialize()

◆ int_as_float()