SIRIUS 7.5.0
Electronic structure library and applications
Functions
residuals_aux.cu File Reference

CUDA kernel to compute wave-function residuals on GPUs. More...

Go to the source code of this file.

Functions

template<typename T >
__global__ void compute_residuals_gpu_kernel (int const num_rows_loc__, T const *eval__, gpu_complex_type< T > const *hpsi__, gpu_complex_type< T > const *opsi__, gpu_complex_type< T > *res__)
 
template<>
__global__ void compute_residuals_gpu_kernel< double > (int const num_rows_loc__, double const *eval__, acc_complex_double_t const *hpsi__, acc_complex_double_t const *opsi__, acc_complex_double_t *res__)
 
template<>
__global__ void compute_residuals_gpu_kernel< float > (int const num_rows_loc__, float const *eval__, acc_complex_float_t const *hpsi__, acc_complex_float_t const *opsi__, acc_complex_float_t *res__)
 
void compute_residuals_gpu_double (acc_complex_double_t *hpsi__, acc_complex_double_t *opsi__, acc_complex_double_t *res__, int num_rows_loc__, int num_bands__, double *eval__)
 
void compute_residuals_gpu_float (acc_complex_float_t *hpsi__, acc_complex_float_t *opsi__, acc_complex_float_t *res__, int num_rows_loc__, int num_bands__, float *eval__)
 
template<typename T >
__global__ void add_square_sum_gpu_kernel (int num_rows_loc__, gpu_complex_type< T > const *wf__, int reduced__, int mpi_rank__, T *result__)
 
void add_square_sum_gpu_double (acc_complex_double_t *wf__, int num_rows_loc__, int nwf__, int reduced__, int mpi_rank__, double *result__)
 
void add_square_sum_gpu_float (acc_complex_float_t *wf__, int num_rows_loc__, int nwf__, int reduced__, int mpi_rank__, float *result__)
 
template<typename T , typename F >
static __device__ std::enable_if_t< std::is_scalar< F >::value, F > inner_diag_local_aux (gpu_complex_type< T > z1__, gpu_complex_type< T > z2__)
 
template<typename T , typename F >
static __device__ std::enable_if_t<!std::is_scalar< F >::value, F > inner_diag_local_aux (gpu_complex_type< T > z1__, gpu_complex_type< T > z2__)
 For complex-type F (complex<double> or complex<float>). More...
 
template<typename T , typename F >
__global__ void inner_diag_local_gpu_kernel (gpu_complex_type< T > const *wf1__, int ld1__, gpu_complex_type< T > const *wf2__, int ld2__, int ngv_loc__, int reduced__, F *result__)
 
void inner_diag_local_gpu_double_complex_double (gpu_complex_type< double > *wf1__, int ld1__, gpu_complex_type< double > *wf2__, int ld2__, int ngv_loc__, int nwf__, gpu_complex_type< double > *result__)
 
void inner_diag_local_gpu_double_double (gpu_complex_type< double > *wf1__, int ld1__, gpu_complex_type< double > *wf2__, int ld2__, int ngv_loc__, int nwf__, int reduced__, double *result__)
 
template<typename T >
__global__ void apply_preconditioner_gpu_kernel (int const num_rows_loc__, T const *eval__, T const *h_diag__, T const *o_diag__, gpu_complex_type< T > *res__)
 
template<>
__global__ void apply_preconditioner_gpu_kernel< double > (int const num_rows_loc__, double const *eval__, double const *h_diag__, double const *o_diag__, acc_complex_double_t *res__)
 
template<>
__global__ void apply_preconditioner_gpu_kernel< float > (int const num_rows_loc__, float const *eval__, float const *h_diag__, float const *o_diag__, acc_complex_float_t *res__)
 
void apply_preconditioner_gpu_double (acc_complex_double_t *res__, int num_rows_loc__, int num_bands__, double *eval__, const double *h_diag__, const double *o_diag__)
 
void apply_preconditioner_gpu_float (acc_complex_float_t *res__, int num_rows_loc__, int num_bands__, float *eval__, const float *h_diag__, const float *o_diag__)
 
template<typename T >
__global__ void make_real_g0_gpu_kernel (gpu_complex_type< T > *res__, int ld__)
 
template<>
__global__ void make_real_g0_gpu_kernel< double > (acc_complex_double_t *res__, int ld__)
 
template<>
__global__ void make_real_g0_gpu_kernel< float > (acc_complex_float_t *res__, int ld__)
 
void make_real_g0_gpu_double (acc_complex_double_t *res__, int ld__, int n__)
 
void make_real_g0_gpu_float (acc_complex_float_t *res__, int ld__, int n__)
 
template<typename T , typename F >
__global__ void axpby_gpu_kernel (F const *beta__, gpu_complex_type< T > *y__, int ld2__, int ngv_loc__)
 
template<typename T , typename F >
__global__ void axpby_gpu_kernel (F const *alpha__, gpu_complex_type< T > const *x__, int ld1__, F const *beta__, gpu_complex_type< T > *y__, int ld2__, int ngv_loc__)
 
template<typename T , typename F >
__global__ void axpy_scatter_gpu_kernel (F const *alpha__, gpu_complex_type< T > const *x__, int ld1__, int const *idx__, gpu_complex_type< T > *y__, int ld2__, int ngv_loc__)
 
void axpby_gpu_double_complex_double (int nwf__, gpu_complex_type< double > const *alpha__, gpu_complex_type< double > const *x__, int ld1__, gpu_complex_type< double > const *beta__, gpu_complex_type< double > *y__, int ld2__, int ngv_loc__)
 
void axpby_gpu_double_double (int nwf__, double const *alpha__, gpu_complex_type< double > const *x__, int ld1__, double const *beta__, gpu_complex_type< double > *y__, int ld2__, int ngv_loc__)
 
void axpy_scatter_gpu_double_complex_double (int N_unconverged, gpu_complex_type< double > const *alpha__, gpu_complex_type< double > const *x__, int ld1__, int const *idx__, gpu_complex_type< double > *y__, int ld2__, int ngv_loc__)
 
void axpy_scatter_gpu_double_double (int N_unconverged, double const *alpha__, gpu_complex_type< double > const *x__, int ld1__, int const *idx__, gpu_complex_type< double > *y__, int ld2__, int ngv_loc__)
 

Detailed Description

CUDA kernel to compute wave-function residuals on GPUs.

Definition in file residuals_aux.cu.

Function Documentation

◆ compute_residuals_gpu_kernel< double >()

template<>
__global__ void compute_residuals_gpu_kernel< double > ( int const  num_rows_loc__,
double const *  eval__,
acc_complex_double_t const *  hpsi__,
acc_complex_double_t const *  opsi__,
acc_complex_double_t *  res__ 
)

Definition at line 42 of file residuals_aux.cu.

◆ compute_residuals_gpu_kernel< float >()

template<>
__global__ void compute_residuals_gpu_kernel< float > ( int const  num_rows_loc__,
float const *  eval__,
acc_complex_float_t const *  hpsi__,
acc_complex_float_t const *  opsi__,
acc_complex_float_t *  res__ 
)

Definition at line 62 of file residuals_aux.cu.

◆ compute_residuals_gpu_double()

void compute_residuals_gpu_double ( acc_complex_double_t *  hpsi__,
acc_complex_double_t *  opsi__,
acc_complex_double_t *  res__,
int  num_rows_loc__,
int  num_bands__,
double *  eval__ 
)

Definition at line 82 of file residuals_aux.cu.

◆ compute_residuals_gpu_float()

void compute_residuals_gpu_float ( acc_complex_float_t *  hpsi__,
acc_complex_float_t *  opsi__,
acc_complex_float_t *  res__,
int  num_rows_loc__,
int  num_bands__,
float *  eval__ 
)

Definition at line 101 of file residuals_aux.cu.

◆ add_square_sum_gpu_kernel()

template<typename T >
__global__ void add_square_sum_gpu_kernel ( int  num_rows_loc__,
gpu_complex_type< T > const *  wf__,
int  reduced__,
int  mpi_rank__,
T *  result__ 
)

Definition at line 122 of file residuals_aux.cu.

◆ add_square_sum_gpu_double()

void add_square_sum_gpu_double ( acc_complex_double_t *  wf__,
int  num_rows_loc__,
int  nwf__,
int  reduced__,
int  mpi_rank__,
double *  result__ 
)

Definition at line 170 of file residuals_aux.cu.

◆ add_square_sum_gpu_float()

void add_square_sum_gpu_float ( acc_complex_float_t *  wf__,
int  num_rows_loc__,
int  nwf__,
int  reduced__,
int  mpi_rank__,
float *  result__ 
)

Definition at line 184 of file residuals_aux.cu.

◆ inner_diag_local_aux() [1/2]

template<typename T , typename F >
static __device__ std::enable_if_t< std::is_scalar< F >::value, F > inner_diag_local_aux ( gpu_complex_type< T >  z1__,
gpu_complex_type< T >  z2__ 
)
inlinestatic

Definition at line 201 of file residuals_aux.cu.

◆ inner_diag_local_aux() [2/2]

template<typename T , typename F >
static __device__ std::enable_if_t<!std::is_scalar< F >::value, F > inner_diag_local_aux ( gpu_complex_type< T >  z1__,
gpu_complex_type< T >  z2__ 
)
inlinestatic

For complex-type F (complex<double> or complex<float>).

Definition at line 209 of file residuals_aux.cu.

◆ inner_diag_local_gpu_kernel()

template<typename T , typename F >
__global__ void inner_diag_local_gpu_kernel ( gpu_complex_type< T > const *  wf1__,
int  ld1__,
gpu_complex_type< T > const *  wf2__,
int  ld2__,
int  ngv_loc__,
int  reduced__,
F *  result__ 
)

Definition at line 216 of file residuals_aux.cu.

◆ inner_diag_local_gpu_double_complex_double()

void inner_diag_local_gpu_double_complex_double ( gpu_complex_type< double > *  wf1__,
int  ld1__,
gpu_complex_type< double > *  wf2__,
int  ld2__,
int  ngv_loc__,
int  nwf__,
gpu_complex_type< double > *  result__ 
)

Definition at line 265 of file residuals_aux.cu.

◆ inner_diag_local_gpu_double_double()

void inner_diag_local_gpu_double_double ( gpu_complex_type< double > *  wf1__,
int  ld1__,
gpu_complex_type< double > *  wf2__,
int  ld2__,
int  ngv_loc__,
int  nwf__,
int  reduced__,
double *  result__ 
)

Definition at line 278 of file residuals_aux.cu.

◆ apply_preconditioner_gpu_kernel< double >()

template<>
__global__ void apply_preconditioner_gpu_kernel< double > ( int const  num_rows_loc__,
double const *  eval__,
double const *  h_diag__,
double const *  o_diag__,
acc_complex_double_t *  res__ 
)

Definition at line 311 of file residuals_aux.cu.

◆ apply_preconditioner_gpu_kernel< float >()

template<>
__global__ void apply_preconditioner_gpu_kernel< float > ( int const  num_rows_loc__,
float const *  eval__,
float const *  h_diag__,
float const *  o_diag__,
acc_complex_float_t *  res__ 
)

Definition at line 329 of file residuals_aux.cu.

◆ apply_preconditioner_gpu_double()

void apply_preconditioner_gpu_double ( acc_complex_double_t *  res__,
int  num_rows_loc__,
int  num_bands__,
double *  eval__,
const double *  h_diag__,
const double *  o_diag__ 
)

Definition at line 347 of file residuals_aux.cu.

◆ apply_preconditioner_gpu_float()

void apply_preconditioner_gpu_float ( acc_complex_float_t *  res__,
int  num_rows_loc__,
int  num_bands__,
float *  eval__,
const float *  h_diag__,
const float *  o_diag__ 
)

Definition at line 360 of file residuals_aux.cu.

◆ make_real_g0_gpu_kernel< double >()

template<>
__global__ void make_real_g0_gpu_kernel< double > ( acc_complex_double_t *  res__,
int  ld__ 
)

Definition at line 378 of file residuals_aux.cu.

◆ make_real_g0_gpu_kernel< float >()

template<>
__global__ void make_real_g0_gpu_kernel< float > ( acc_complex_float_t *  res__,
int  ld__ 
)

Definition at line 388 of file residuals_aux.cu.

◆ make_real_g0_gpu_double()

void make_real_g0_gpu_double ( acc_complex_double_t *  res__,
int  ld__,
int  n__ 
)

Definition at line 398 of file residuals_aux.cu.

◆ make_real_g0_gpu_float()

void make_real_g0_gpu_float ( acc_complex_float_t *  res__,
int  ld__,
int  n__ 
)

Definition at line 406 of file residuals_aux.cu.

◆ axpby_gpu_kernel() [1/2]

template<typename T , typename F >
__global__ void axpby_gpu_kernel ( F const *  beta__,
gpu_complex_type< T > *  y__,
int  ld2__,
int  ngv_loc__ 
)

Definition at line 416 of file residuals_aux.cu.

◆ axpby_gpu_kernel() [2/2]

template<typename T , typename F >
__global__ void axpby_gpu_kernel ( F const *  alpha__,
gpu_complex_type< T > const *  x__,
int  ld1__,
F const *  beta__,
gpu_complex_type< T > *  y__,
int  ld2__,
int  ngv_loc__ 
)

Definition at line 430 of file residuals_aux.cu.

◆ axpy_scatter_gpu_kernel()

template<typename T , typename F >
__global__ void axpy_scatter_gpu_kernel ( F const *  alpha__,
gpu_complex_type< T > const *  x__,
int  ld1__,
int const *  idx__,
gpu_complex_type< T > *  y__,
int  ld2__,
int  ngv_loc__ 
)

Definition at line 455 of file residuals_aux.cu.

◆ axpby_gpu_double_complex_double()

void axpby_gpu_double_complex_double ( int  nwf__,
gpu_complex_type< double > const *  alpha__,
gpu_complex_type< double > const *  x__,
int  ld1__,
gpu_complex_type< double > const *  beta__,
gpu_complex_type< double > *  y__,
int  ld2__,
int  ngv_loc__ 
)

Definition at line 476 of file residuals_aux.cu.

◆ axpby_gpu_double_double()

void axpby_gpu_double_double ( int  nwf__,
double const *  alpha__,
gpu_complex_type< double > const *  x__,
int  ld1__,
double const *  beta__,
gpu_complex_type< double > *  y__,
int  ld2__,
int  ngv_loc__ 
)

Definition at line 492 of file residuals_aux.cu.

◆ axpy_scatter_gpu_double_complex_double()

void axpy_scatter_gpu_double_complex_double ( int  N_unconverged,
gpu_complex_type< double > const *  alpha__,
gpu_complex_type< double > const *  x__,
int  ld1__,
int const *  idx__,
gpu_complex_type< double > *  y__,
int  ld2__,
int  ngv_loc__ 
)

Definition at line 507 of file residuals_aux.cu.

◆ axpy_scatter_gpu_double_double()

void axpy_scatter_gpu_double_double ( int  N_unconverged,
double const *  alpha__,
gpu_complex_type< double > const *  x__,
int  ld1__,
int const *  idx__,
gpu_complex_type< double > *  y__,
int  ld2__,
int  ngv_loc__ 
)

Definition at line 517 of file residuals_aux.cu.