14 #include "detail/common.h"
23 #include <string_view>
24 #include <type_traits>
39 h2d = detail::dir_h2d,
44 d2h = detail::dir_d2h,
50 explicit exception(std::string_view message_) : message(message_) {}
52 const char *
what() const noexcept
override {
return message.c_str(); }
85 std::function<void(std::string_view)>
logging_sink = [](std::string_view msg) {
87 std::fwrite(msg.data(), 1, msg.size(), stderr);
88 std::fputc(
'\n', stderr);
206 static std::vector<device>
all();
245 int id()
const {
return m_impl.id; }
258 detail::device m_impl;
265 detail::device &
impl() {
return m_impl; }
284 std::string_view
name()
const {
return m_prop.name; }
294 std::string_view
arch()
const {
return m_prop.arch; }
319 std::array<size_t, 3>
max_grid_size()
const {
return m_prop.max_grid_size; }
325 std::string_view
xpuid()
const {
return m_prop.xpuid; }
330 int id()
const {
return m_prop.id; }
348 detail::device_prop m_prop;
371 void copy(
const T *from, T *to,
size_t size) {
memcpy(to, from, size *
sizeof(T)); }
373 void memcpy(
void *dst,
const void *src,
size_t size_bytes);
375 void memset(
void *dst,
int value,
size_t size_bytes);
380 template<
typename Kernel,
typename... Args>
386 std::shared_ptr<detail::queue_handle> m_handle;
388 void do_copy(
const void *from,
void *to,
size_t size,
double *ms);
389 void log_copy(
const void *from,
const void *to,
size_t size);
392 template<
typename Kernel>
395 template<
typename Func,
typename... Args>
400 void set(
const typename C::data_t &symbol);
429 const T *
data()
const {
return m_data; }
434 size_t size()
const {
return m_size; }
444 bool empty()
const {
return m_size == 0; }
447 const T *
begin()
const {
return m_data; }
449 T *
end() {
return m_data + m_size; }
450 const T *
end()
const {
return m_data + m_size; }
456 const T &
back()
const {
return at(m_size - 1); }
472 const T &
at(
size_t i)
const;
479 const T &
unsafe_at(
size_t i)
const {
return m_data[i]; }
498 pinned = detail::mem_pinned,
503 device = detail::mem_device,
515 host = detail::mem_host,
536 void *
ptr()
const {
return m_prop.ptr; }
547 bool is_host()
const {
return m_prop.type == detail::mem_host || m_prop.type == detail::mem_pinned; }
561 detail::ptr_prop m_prop;
571 size_t size()
const {
return m_size; }
574 void *
h_ptr()
const {
return m_host; }
576 T *
h_ptr()
const {
return static_cast<T *
>(m_host); }
577 void *
d_ptr()
const {
return m_device; }
579 T *
d_ptr()
const {
return static_cast<T *
>(m_device); }
601 std::string_view
name()
const {
return m_t.name; }
606 double total()
const {
return std::accumulate(m_t.times.begin(), m_t.times.end(), 0.0); }
611 const std::vector<double> &
times()
const {
return m_t.times; }
620 detail::kernel_timings m_t;
640 std::string_view
name()
const {
return m_t.name; }
646 double wall()
const {
return m_t.wall; }
654 return dir ==
h2d ? m_t.copy_h2d : m_t.copy_d2h;
661 double memset()
const {
return m_t.memset; }
680 return std::accumulate(m_t.kernels.begin(), m_t.kernels.end(), 0.0,
681 [](
double a,
const auto &b) { return a + std::accumulate(b.times.begin(), b.times.end(), 0.0); });
726 explicit timings(detail::timings t) : m_t(std::move(t)) {}
775 template<
typename Kernel>
780 #include "impl/host.tpp"
buffer_prop(const buffer< T > &)
T * d_ptr() const
Definition: host.h:579
h_view< T > view() const
Definition: host.h:582
void * d_ptr() const
Definition: host.h:577
size_t size() const
Definition: host.h:571
T * h_ptr() const
Definition: host.h:576
size_t size_bytes() const
Definition: host.h:572
buffer_type type() const
Definition: host.h:573
void * h_ptr() const
Definition: host.h:574
size_t max_threads_per_block() const
Returns the max number of threads in a block.
Definition: host.h:314
driver_t backend() const
Get the backend associated with the device.
Definition: host.h:289
size_t global_mem_available() const
Returns the amount of global memory available in bytes.
Definition: host.h:345
device_prop(device)
Query properties of the given device.
std::string_view xpuid() const
Get the string used to identify the device.
Definition: host.h:325
std::array< size_t, 3 > max_grid_size() const
Returns the max number of threads in a block.
Definition: host.h:319
std::string_view arch() const
Returns the architecture of the device, if applicable.
Definition: host.h:294
int device_nr() const
Get the device number within the backend.
Definition: host.h:335
int id() const
Get the device id.
Definition: host.h:330
size_t const_mem_size() const
Returns the size of constant memory in bytes.
Definition: host.h:304
size_t warp_size() const
Returns the number of threads in a warp.
Definition: host.h:309
std::string_view name() const
Get the name of the device.
Definition: host.h:284
size_t shared_mem_size() const
Returns the size of shared memory per block in bytes.
Definition: host.h:299
size_t global_mem_total() const
Returns the total amount of global memory in bytes.
Definition: host.h:340
int device_nr() const
Get the device number within the backend.
Definition: host.h:255
static device active()
Get the active device.
device(driver_t driver, int device_nr)
Construct device from driver and device number.
device()
Construct CPU device.
int id() const
Get the device id.
Definition: host.h:245
device(device &&)=default
device(std::string_view xpuid)
Lookup device by string.
driver_t backend() const
Get the backend associated with the device.
Definition: host.h:250
static std::vector< device > all()
Get all available devices.
device & operator=(device &&)=default
detail::device & impl()
Definition: host.h:265
device(int id)
Construct device from device id.
device & operator=(const device &)=default
device(detail::device impl)
Definition: host.h:262
device(const device &)=default
const char * what() const noexcept override
Definition: host.h:52
exception(std::string_view message_)
Definition: host.h:50
Create a view from a buffer. Create a view from a buffer to access the underlying data on the host....
Definition: host.h:410
const T * end() const
Definition: host.h:450
size_t size_bytes() const
Definition: host.h:439
T & unsafe_at(size_t i)
Definition: host.h:478
const T & front() const
Definition: host.h:453
const T * data() const
Definition: host.h:429
T & back()
Definition: host.h:455
const T & operator[](size_t i) const
const T * begin() const
Definition: host.h:447
T & front()
Definition: host.h:452
T * end()
Definition: host.h:449
size_t size() const
Definition: host.h:434
bool empty() const
Definition: host.h:444
T value_type
Definition: host.h:413
T * begin()
Definition: host.h:446
const T & at(size_t i) const
const T & back() const
Definition: host.h:456
const T & unsafe_at(size_t i) const
Definition: host.h:479
h_view()
Create an empty view.
Definition: host.h:418
h_view(buffer< T > &)
Create a view from a buffer.
T * data()
Definition: host.h:428
h_view(T *data, size_t size)
Definition: host.h:487
Execution times collected for a kernel.
Definition: host.h:595
double total() const
Definition: host.h:606
std::string_view name() const
Definition: host.h:601
const std::vector< double > & times() const
Definition: host.h:611
kernel_timings(detail::kernel_timings t)
Definition: host.h:624
double throughput() const
Properties of a pointer. Properties of a pointer allocated with malloc_device, malloc_host or malloc_...
Definition: host.h:522
driver_t backend() const
Definition: host.h:558
void * ptr() const
Definition: host.h:536
xpu::device device() const
Definition: host.h:552
ptr_prop(const void *)
Create a pointer property object from a pointer.
mem_type type() const
Definition: host.h:542
bool is_host() const
Definition: host.h:547
command queue for a device.
Definition: host.h:354
void memset(buffer< T >, int value)
void launch(grid params, Args &&... args)
void copy(const T *from, T *to, size_t size)
Definition: host.h:371
void memset(void *dst, int value, size_t size_bytes)
void copy(buffer< T >, direction)
void memcpy(void *dst, const void *src, size_t size_bytes)
RAII wrapper for timing functions.
Definition: host.h:747
scoped_timer & operator=(scoped_timer &&)=delete
scoped_timer(scoped_timer &&)=delete
scoped_timer(std::string_view name, xpu::timings *t=nullptr)
scoped_timer(const scoped_timer &)=delete
scoped_timer & operator=(const scoped_timer &)=delete
Timing information collected via xpu::push_timer and xpu::pop_timer.
Definition: host.h:632
double throughput_copy(direction dir) const
timings(detail::timings t)
Definition: host.h:726
double throughput() const
std::vector< timings > children() const
std::string_view name() const
Definition: host.h:640
double kernel_time() const
Definition: host.h:679
double copy(direction dir) const
Definition: host.h:653
double throughput_memset() const
double throughput_kernels() const
double memset() const
Definition: host.h:661
void merge(const timings &other)
Definition: host.h:718
kernel_timings kernel() const
Definition: host.h:668
double wall() const
Definition: host.h:646
bool has_details() const
Definition: host.h:692
std::vector< kernel_timings > kernels() const
Common definitions for xpu.
xpu default namespace.
Definition: common.h:17
void * malloc_managed(size_t size_bytes)
Allocate memory that can be accessed by the device and the host.
void preload()
Preload the given device image.
void stack_alloc(size_t size)
Allocate the stack memory on the device.
buffer_type
Definition: common.h:77
void k_add_bytes(size_t bytes)
T * malloc_host(size_t elems)
Allocate pinned memory on the host that can be accessed by the device.
void * malloc_pinned(size_t size_bytes)
Allocate pinned memory on the host that can be accessed by the device.
void call(Args &&... args)
direction
Definition: host.h:35
@ h2d
Host to device transfer.
Definition: host.h:39
@ d2h
Device to host transfer.
Definition: host.h:44
void initialize(settings={})
Initialize xpu.
void set(const typename C::data_t &symbol)
void free(void *)
Free memory allocated with malloc_device, malloc_pinned or malloc_managed.
void push_timer(std::string_view name)
void stack_pop(void *head=nullptr)
Pop entries from the stack.
void t_add_bytes(size_t bytes)
void * malloc_device(size_t size_bytes)
Allocate memory on the device.
mem_type
Definition: host.h:493
driver_t
Definition: common.h:19
3d execution grid describing the number of blocks and threads of a kernel Use 'n_blocks' or 'n_thread...
Definition: common.h:51
Settings used to initialize xpu.
Definition: host.h:62
bool verbose
Enable internal logging. Display information about device operations like memory allocation,...
Definition: host.h:79
std::function< void(std::string_view)> logging_sink
Set a custom logging sink. By default messages are written to stderr. Has no effect if 'verbose' is f...
Definition: host.h:85
std::vector< driver_t > excluded_backends
Backends that should be excluded.
Definition: host.h:103
bool profile
Enable profiling of kernels. Value may be overwritten by setting environment variable XPU_PROFILE.
Definition: host.h:96