SIRIUS 7.5.0
Electronic structure library and applications
Namespaces | Classes | Functions | Variables
sirius::acc Namespace Reference

Namespace for accelerator-related functions. More...

Namespaces

namespace  blas
 User facing interface to GPU blas functions.
 
namespace  blas_api
 Internal interface to accelerated BLAS functions (CUDA or ROCM).
 
namespace  cusolver
 Interface to cuSolver library.
 
namespace  lapack
 Interface to accelerated lapack functions.
 
namespace  rocsolver
 Interface to ROCM eigensolver.
 

Classes

class  stream_id
 Helper class to wrap stream id (integer number). More...
 

Functions

int num_devices ()
 Get the number of devices. More...
 
std::vector< acc_stream_t > & streams ()
 Vector of device streams. More...
 
void stack_backtrace ()
 
void set_device_id (int id__)
 Set the GPU id. More...
 
int get_device_id ()
 Get current device ID. More...
 
acc_stream_t stream (stream_id sid__)
 Return a single device stream. More...
 
int num_streams ()
 Get number of streams. More...
 
void create_streams (int num_streams__)
 Create CUDA streams. More...
 
void destroy_streams ()
 Destroy CUDA streams. More...
 
void sync_stream (stream_id sid__)
 Synchronize a single stream. More...
 
void reset ()
 Reset device. More...
 
void sync ()
 Synchronize device. More...
 
size_t get_free_mem ()
 
void print_device_info (int device_id__, std::ostream &out__)
 
template<typename T >
void copy (T *target__, T const *source__, size_t n__)
 Copy memory inside a device. More...
 
template<typename T >
void copy (T *target__, int ld1__, T const *source__, int ld2__, int nrow__, int ncol__)
 2D copy inside a device. More...
 
template<typename T >
void copyin (T *target__, T const *source__, size_t n__)
 Copy memory from host to device. More...
 
template<typename T >
void copyin (T *target__, T const *source__, size_t n__, stream_id sid__)
 Asynchronous copy from host to device. More...
 
template<typename T >
void copyin (T *target__, int ld1__, T const *source__, int ld2__, int nrow__, int ncol__)
 2D copy to the device. More...
 
template<typename T >
void copyin (T *target__, int ld1__, T const *source__, int ld2__, int nrow__, int ncol__, stream_id sid__)
 Asynchronous 2D copy to the device. More...
 
template<typename T >
void copyout (T *target__, T const *source__, size_t n__)
 Copy memory from device to host. More...
 
template<typename T >
void copyout (T *target__, T const *source__, size_t n__, stream_id sid__)
 Asynchronous copy from device to host. More...
 
template<typename T >
void copyout (T *target__, int ld1__, T const *source__, int ld2__, int nrow__, int ncol__)
 2D copy from device to host. More...
 
template<typename T >
void copyout (T *target__, int ld1__, T const *source__, int ld2__, int nrow__, int ncol__, stream_id sid__)
 Asynchronous 2D copy from device to host. More...
 
template<typename T >
void zero (T *ptr__, size_t n__)
 Zero the device memory. More...
 
template<typename T >
void zero (T *ptr__, size_t n__, stream_id sid__)
 
template<typename T >
void zero (T *ptr__, int ld__, int nrow__, int ncol__)
 Zero the 2D block of device memory. More...
 
template<typename T >
T * allocate (size_t size__)
 Allocate memory on the GPU. More...
 
void deallocate (void *ptr__)
 Deallocate GPU memory. More...
 
template<typename T >
T * allocate_host (size_t size__)
 Allocate pinned memory on the host. More...
 
void deallocate_host (void *ptr__)
 Deallocate host memory. More...
 
void begin_range_marker (const char *label__)
 
void end_range_marker ()
 
template<typename T >
void register_host (T *ptr__, size_t size__)
 
void unregister_host (void *ptr)
 
bool check_last_error ()
 
bool check_device_ptr (void const *ptr__)
 
__device__ size_t array2D_offset (int i0, int i1, int ld0)
 
__device__ size_t array3D_offset (int i0, int i1, int i2, int ld0, int ld1)
 
__device__ size_t array4D_offset (int i0, int i1, int i2, int i3, int ld0, int ld1, int ld2)
 
__host__ __device__ int num_blocks (int length, int block_size)
 
__device__ auto add_accNumbers (double x, double y)
 
__device__ auto add_accNumbers (float x, float y)
 
__device__ auto add_accNumbers (gpu_complex_type< double > x, gpu_complex_type< double > y)
 
__device__ auto add_accNumbers (gpu_complex_type< float > x, gpu_complex_type< float > y)
 
__device__ auto sub_accNumbers (double x, double y)
 
__device__ auto sub_accNumbers (float x, float y)
 
__device__ auto sub_accNumbers (gpu_complex_type< double > x, gpu_complex_type< double > y)
 
__device__ auto sub_accNumbers (gpu_complex_type< float > x, gpu_complex_type< float > y)
 
__device__ auto make_accComplex (float x, float y)
 
__device__ auto make_accComplex (double x, double y)
 
__device__ auto mul_accNumbers (gpu_complex_type< double > x, gpu_complex_type< double > y)
 
__device__ auto mul_accNumbers (double x, gpu_complex_type< double > y)
 
__device__ auto mul_accNumbers (gpu_complex_type< float > x, gpu_complex_type< float > y)
 
__device__ auto mul_accNumbers (float x, gpu_complex_type< float > y)
 
template<typename T >
__device__ auto accZero ()
 
template<>
__device__ auto accZero< double > ()
 
template<>
__device__ auto accZero< float > ()
 
template<>
__device__ auto accZero< gpu_complex_type< double > > ()
 
template<>
__device__ auto accZero< gpu_complex_type< float > > ()
 
bool __device__ is_zero (gpu_complex_type< float > x)
 
bool __device__ is_zero (gpu_complex_type< double > x)
 
bool __device__ is_zero (float x)
 
bool __device__ is_zero (double x)
 

Variables

const double twopi = 6.2831853071795864769
 

Detailed Description

Namespace for accelerator-related functions.

Function Documentation

◆ num_devices()

int sirius::acc::num_devices ( )

Get the number of devices.

Definition at line 32 of file acc.cpp.

◆ streams()

std::vector< acc_stream_t > & sirius::acc::streams ( )

Vector of device streams.

Definition at line 50 of file acc.cpp.

◆ stack_backtrace()

void sirius::acc::stack_backtrace ( )
inline

Definition at line 146 of file acc.hpp.

◆ set_device_id()

void sirius::acc::set_device_id ( int  id__)
inline

Set the GPU id.

Definition at line 183 of file acc.hpp.

◆ get_device_id()

int sirius::acc::get_device_id ( )
inline

Get current device ID.

Definition at line 191 of file acc.hpp.

◆ stream()

acc_stream_t sirius::acc::stream ( stream_id  sid__)
inline

Return a single device stream.

Definition at line 202 of file acc.hpp.

◆ num_streams()

int sirius::acc::num_streams ( )
inline

Get number of streams.

Definition at line 209 of file acc.hpp.

◆ create_streams()

void sirius::acc::create_streams ( int  num_streams__)
inline

Create CUDA streams.

Definition at line 215 of file acc.hpp.

◆ destroy_streams()

void sirius::acc::destroy_streams ( )
inline

Destroy CUDA streams.

Definition at line 226 of file acc.hpp.

◆ sync_stream()

void sirius::acc::sync_stream ( stream_id  sid__)
inline

Synchronize a single stream.

Definition at line 234 of file acc.hpp.

◆ reset()

void sirius::acc::reset ( )
inline

Reset device.

Definition at line 240 of file acc.hpp.

◆ sync()

void sirius::acc::sync ( )
inline

Synchronize device.

Definition at line 249 of file acc.hpp.

◆ get_free_mem()

size_t sirius::acc::get_free_mem ( )
inline

Definition at line 255 of file acc.hpp.

◆ print_device_info()

void sirius::acc::print_device_info ( int  device_id__,
std::ostream &  out__ 
)
inline

Definition at line 265 of file acc.hpp.

◆ copy() [1/2]

template<typename T >
void sirius::acc::copy ( T *  target__,
T const *  source__,
size_t  n__ 
)
inline

Copy memory inside a device.

Definition at line 320 of file acc.hpp.

◆ copy() [2/2]

template<typename T >
void sirius::acc::copy ( T *  target__,
int  ld1__,
T const *  source__,
int  ld2__,
int  nrow__,
int  ncol__ 
)
inline

2D copy inside a device.

Definition at line 329 of file acc.hpp.

◆ copyin() [1/4]

template<typename T >
void sirius::acc::copyin ( T *  target__,
T const *  source__,
size_t  n__ 
)
inline

Copy memory from host to device.

Definition at line 337 of file acc.hpp.

◆ copyin() [2/4]

template<typename T >
void sirius::acc::copyin ( T *  target__,
T const *  source__,
size_t  n__,
stream_id  sid__ 
)
inline

Asynchronous copy from host to device.

Definition at line 344 of file acc.hpp.

◆ copyin() [3/4]

template<typename T >
void sirius::acc::copyin ( T *  target__,
int  ld1__,
T const *  source__,
int  ld2__,
int  nrow__,
int  ncol__ 
)
inline

2D copy to the device.

Definition at line 351 of file acc.hpp.

◆ copyin() [4/4]

template<typename T >
void sirius::acc::copyin ( T *  target__,
int  ld1__,
T const *  source__,
int  ld2__,
int  nrow__,
int  ncol__,
stream_id  sid__ 
)
inline

Asynchronous 2D copy to the device.

Definition at line 359 of file acc.hpp.

◆ copyout() [1/4]

template<typename T >
void sirius::acc::copyout ( T *  target__,
T const *  source__,
size_t  n__ 
)
inline

Copy memory from device to host.

Definition at line 367 of file acc.hpp.

◆ copyout() [2/4]

template<typename T >
void sirius::acc::copyout ( T *  target__,
T const *  source__,
size_t  n__,
stream_id  sid__ 
)
inline

Asynchronous copy from device to host.

Definition at line 374 of file acc.hpp.

◆ copyout() [3/4]

template<typename T >
void sirius::acc::copyout ( T *  target__,
int  ld1__,
T const *  source__,
int  ld2__,
int  nrow__,
int  ncol__ 
)
inline

2D copy from device to host.

Definition at line 381 of file acc.hpp.

◆ copyout() [4/4]

template<typename T >
void sirius::acc::copyout ( T *  target__,
int  ld1__,
T const *  source__,
int  ld2__,
int  nrow__,
int  ncol__,
stream_id  sid__ 
)
inline

Asynchronous 2D copy from device to host.

Definition at line 389 of file acc.hpp.

◆ zero() [1/3]

template<typename T >
void sirius::acc::zero ( T *  ptr__,
size_t  n__ 
)
inline

Zero the device memory.

Definition at line 397 of file acc.hpp.

◆ zero() [2/3]

template<typename T >
void sirius::acc::zero ( T *  ptr__,
size_t  n__,
stream_id  sid__ 
)
inline

Definition at line 403 of file acc.hpp.

◆ zero() [3/3]

template<typename T >
void sirius::acc::zero ( T *  ptr__,
int  ld__,
int  nrow__,
int  ncol__ 
)
inline

Zero the 2D block of device memory.

Definition at line 410 of file acc.hpp.

◆ allocate()

template<typename T >
T * sirius::acc::allocate ( size_t  size__)
inline

Allocate memory on the GPU.

Definition at line 417 of file acc.hpp.

◆ deallocate()

void sirius::acc::deallocate ( void *  ptr__)
inline

Deallocate GPU memory.

Definition at line 435 of file acc.hpp.

◆ allocate_host()

template<typename T >
T * sirius::acc::allocate_host ( size_t  size__)
inline

Allocate pinned memory on the host.

Definition at line 442 of file acc.hpp.

◆ deallocate_host()

void sirius::acc::deallocate_host ( void *  ptr__)
inline

Deallocate host memory.

Definition at line 454 of file acc.hpp.

◆ begin_range_marker()

void sirius::acc::begin_range_marker ( const char *  label__)
inline

Definition at line 465 of file acc.hpp.

◆ end_range_marker()

void sirius::acc::end_range_marker ( )
inline

Definition at line 470 of file acc.hpp.

◆ register_host()

template<typename T >
void sirius::acc::register_host ( T *  ptr__,
size_t  size__ 
)
inline

Definition at line 476 of file acc.hpp.

◆ unregister_host()

void sirius::acc::unregister_host ( void *  ptr)
inline

Definition at line 483 of file acc.hpp.

◆ check_last_error()

bool sirius::acc::check_last_error ( )
inline

Definition at line 488 of file acc.hpp.

◆ check_device_ptr()

bool sirius::acc::check_device_ptr ( void const *  ptr__)
inline

Definition at line 499 of file acc.hpp.

◆ array2D_offset()

__device__ size_t sirius::acc::array2D_offset ( int  i0,
int  i1,
int  ld0 
)
inline

Definition at line 54 of file acc_common.hpp.

◆ array3D_offset()

__device__ size_t sirius::acc::array3D_offset ( int  i0,
int  i1,
int  i2,
int  ld0,
int  ld1 
)
inline

Definition at line 59 of file acc_common.hpp.

◆ array4D_offset()

__device__ size_t sirius::acc::array4D_offset ( int  i0,
int  i1,
int  i2,
int  i3,
int  ld0,
int  ld1,
int  ld2 
)
inline

Definition at line 64 of file acc_common.hpp.

◆ num_blocks()

__host__ __device__ int sirius::acc::num_blocks ( int  length,
int  block_size 
)
inline

Definition at line 69 of file acc_common.hpp.

◆ add_accNumbers() [1/4]

__device__ auto sirius::acc::add_accNumbers ( double  x,
double  y 
)
inline

Definition at line 74 of file acc_common.hpp.

◆ add_accNumbers() [2/4]

__device__ auto sirius::acc::add_accNumbers ( float  x,
float  y 
)
inline

Definition at line 79 of file acc_common.hpp.

◆ add_accNumbers() [3/4]

__device__ auto sirius::acc::add_accNumbers ( gpu_complex_type< double >  x,
gpu_complex_type< double >  y 
)
inline

Definition at line 84 of file acc_common.hpp.

◆ add_accNumbers() [4/4]

__device__ auto sirius::acc::add_accNumbers ( gpu_complex_type< float >  x,
gpu_complex_type< float >  y 
)
inline

Definition at line 89 of file acc_common.hpp.

◆ sub_accNumbers() [1/4]

__device__ auto sirius::acc::sub_accNumbers ( double  x,
double  y 
)
inline

Definition at line 94 of file acc_common.hpp.

◆ sub_accNumbers() [2/4]

__device__ auto sirius::acc::sub_accNumbers ( float  x,
float  y 
)
inline

Definition at line 99 of file acc_common.hpp.

◆ sub_accNumbers() [3/4]

__device__ auto sirius::acc::sub_accNumbers ( gpu_complex_type< double >  x,
gpu_complex_type< double >  y 
)
inline

Definition at line 104 of file acc_common.hpp.

◆ sub_accNumbers() [4/4]

__device__ auto sirius::acc::sub_accNumbers ( gpu_complex_type< float >  x,
gpu_complex_type< float >  y 
)
inline

Definition at line 109 of file acc_common.hpp.

◆ make_accComplex() [1/2]

__device__ auto sirius::acc::make_accComplex ( float  x,
float  y 
)
inline

Definition at line 114 of file acc_common.hpp.

◆ make_accComplex() [2/2]

__device__ auto sirius::acc::make_accComplex ( double  x,
double  y 
)
inline

Definition at line 119 of file acc_common.hpp.

◆ mul_accNumbers() [1/4]

__device__ auto sirius::acc::mul_accNumbers ( gpu_complex_type< double >  x,
gpu_complex_type< double >  y 
)
inline

Definition at line 124 of file acc_common.hpp.

◆ mul_accNumbers() [2/4]

__device__ auto sirius::acc::mul_accNumbers ( double  x,
gpu_complex_type< double >  y 
)
inline

Definition at line 129 of file acc_common.hpp.

◆ mul_accNumbers() [3/4]

__device__ auto sirius::acc::mul_accNumbers ( gpu_complex_type< float >  x,
gpu_complex_type< float >  y 
)
inline

Definition at line 134 of file acc_common.hpp.

◆ mul_accNumbers() [4/4]

__device__ auto sirius::acc::mul_accNumbers ( float  x,
gpu_complex_type< float >  y 
)
inline

Definition at line 139 of file acc_common.hpp.

◆ accZero< double >()

template<>
__device__ auto sirius::acc::accZero< double > ( )
inline

Definition at line 148 of file acc_common.hpp.

◆ accZero< float >()

template<>
__device__ auto sirius::acc::accZero< float > ( )
inline

Definition at line 154 of file acc_common.hpp.

◆ accZero< gpu_complex_type< double > >()

template<>
__device__ auto sirius::acc::accZero< gpu_complex_type< double > > ( )
inline

Definition at line 160 of file acc_common.hpp.

◆ accZero< gpu_complex_type< float > >()

template<>
__device__ auto sirius::acc::accZero< gpu_complex_type< float > > ( )
inline

Definition at line 166 of file acc_common.hpp.

◆ is_zero() [1/4]

bool __device__ sirius::acc::is_zero ( gpu_complex_type< float >  x)
inline

Definition at line 171 of file acc_common.hpp.

◆ is_zero() [2/4]

bool __device__ sirius::acc::is_zero ( gpu_complex_type< double >  x)
inline

Definition at line 176 of file acc_common.hpp.

◆ is_zero() [3/4]

bool __device__ sirius::acc::is_zero ( float  x)
inline

Definition at line 181 of file acc_common.hpp.

◆ is_zero() [4/4]

bool __device__ sirius::acc::is_zero ( double  x)
inline

Definition at line 186 of file acc_common.hpp.

Variable Documentation

◆ twopi

const double sirius::acc::twopi = 6.2831853071795864769

Definition at line 52 of file acc_common.hpp.