SIRIUS 7.5.0
Electronic structure library and applications
|
CUDA kernel to compute wave-function residuals on GPUs. More...
Go to the source code of this file.
Functions | |
template<typename T > | |
__global__ void | compute_residuals_gpu_kernel (int const num_rows_loc__, T const *eval__, gpu_complex_type< T > const *hpsi__, gpu_complex_type< T > const *opsi__, gpu_complex_type< T > *res__) |
template<> | |
__global__ void | compute_residuals_gpu_kernel< double > (int const num_rows_loc__, double const *eval__, acc_complex_double_t const *hpsi__, acc_complex_double_t const *opsi__, acc_complex_double_t *res__) |
template<> | |
__global__ void | compute_residuals_gpu_kernel< float > (int const num_rows_loc__, float const *eval__, acc_complex_float_t const *hpsi__, acc_complex_float_t const *opsi__, acc_complex_float_t *res__) |
void | compute_residuals_gpu_double (acc_complex_double_t *hpsi__, acc_complex_double_t *opsi__, acc_complex_double_t *res__, int num_rows_loc__, int num_bands__, double *eval__) |
void | compute_residuals_gpu_float (acc_complex_float_t *hpsi__, acc_complex_float_t *opsi__, acc_complex_float_t *res__, int num_rows_loc__, int num_bands__, float *eval__) |
template<typename T > | |
__global__ void | add_square_sum_gpu_kernel (int num_rows_loc__, gpu_complex_type< T > const *wf__, int reduced__, int mpi_rank__, T *result__) |
void | add_square_sum_gpu_double (acc_complex_double_t *wf__, int num_rows_loc__, int nwf__, int reduced__, int mpi_rank__, double *result__) |
void | add_square_sum_gpu_float (acc_complex_float_t *wf__, int num_rows_loc__, int nwf__, int reduced__, int mpi_rank__, float *result__) |
template<typename T , typename F > | |
static __device__ std::enable_if_t< std::is_scalar< F >::value, F > | inner_diag_local_aux (gpu_complex_type< T > z1__, gpu_complex_type< T > z2__) |
template<typename T , typename F > | |
static __device__ std::enable_if_t<!std::is_scalar< F >::value, F > | inner_diag_local_aux (gpu_complex_type< T > z1__, gpu_complex_type< T > z2__) |
For complex-type F (complex<double> or complex<float>). More... | |
template<typename T , typename F > | |
__global__ void | inner_diag_local_gpu_kernel (gpu_complex_type< T > const *wf1__, int ld1__, gpu_complex_type< T > const *wf2__, int ld2__, int ngv_loc__, int reduced__, F *result__) |
void | inner_diag_local_gpu_double_complex_double (gpu_complex_type< double > *wf1__, int ld1__, gpu_complex_type< double > *wf2__, int ld2__, int ngv_loc__, int nwf__, gpu_complex_type< double > *result__) |
void | inner_diag_local_gpu_double_double (gpu_complex_type< double > *wf1__, int ld1__, gpu_complex_type< double > *wf2__, int ld2__, int ngv_loc__, int nwf__, int reduced__, double *result__) |
template<typename T > | |
__global__ void | apply_preconditioner_gpu_kernel (int const num_rows_loc__, T const *eval__, T const *h_diag__, T const *o_diag__, gpu_complex_type< T > *res__) |
template<> | |
__global__ void | apply_preconditioner_gpu_kernel< double > (int const num_rows_loc__, double const *eval__, double const *h_diag__, double const *o_diag__, acc_complex_double_t *res__) |
template<> | |
__global__ void | apply_preconditioner_gpu_kernel< float > (int const num_rows_loc__, float const *eval__, float const *h_diag__, float const *o_diag__, acc_complex_float_t *res__) |
void | apply_preconditioner_gpu_double (acc_complex_double_t *res__, int num_rows_loc__, int num_bands__, double *eval__, const double *h_diag__, const double *o_diag__) |
void | apply_preconditioner_gpu_float (acc_complex_float_t *res__, int num_rows_loc__, int num_bands__, float *eval__, const float *h_diag__, const float *o_diag__) |
template<typename T > | |
__global__ void | make_real_g0_gpu_kernel (gpu_complex_type< T > *res__, int ld__) |
template<> | |
__global__ void | make_real_g0_gpu_kernel< double > (acc_complex_double_t *res__, int ld__) |
template<> | |
__global__ void | make_real_g0_gpu_kernel< float > (acc_complex_float_t *res__, int ld__) |
void | make_real_g0_gpu_double (acc_complex_double_t *res__, int ld__, int n__) |
void | make_real_g0_gpu_float (acc_complex_float_t *res__, int ld__, int n__) |
template<typename T , typename F > | |
__global__ void | axpby_gpu_kernel (F const *beta__, gpu_complex_type< T > *y__, int ld2__, int ngv_loc__) |
template<typename T , typename F > | |
__global__ void | axpby_gpu_kernel (F const *alpha__, gpu_complex_type< T > const *x__, int ld1__, F const *beta__, gpu_complex_type< T > *y__, int ld2__, int ngv_loc__) |
template<typename T , typename F > | |
__global__ void | axpy_scatter_gpu_kernel (F const *alpha__, gpu_complex_type< T > const *x__, int ld1__, int const *idx__, gpu_complex_type< T > *y__, int ld2__, int ngv_loc__) |
void | axpby_gpu_double_complex_double (int nwf__, gpu_complex_type< double > const *alpha__, gpu_complex_type< double > const *x__, int ld1__, gpu_complex_type< double > const *beta__, gpu_complex_type< double > *y__, int ld2__, int ngv_loc__) |
void | axpby_gpu_double_double (int nwf__, double const *alpha__, gpu_complex_type< double > const *x__, int ld1__, double const *beta__, gpu_complex_type< double > *y__, int ld2__, int ngv_loc__) |
void | axpy_scatter_gpu_double_complex_double (int N_unconverged, gpu_complex_type< double > const *alpha__, gpu_complex_type< double > const *x__, int ld1__, int const *idx__, gpu_complex_type< double > *y__, int ld2__, int ngv_loc__) |
void | axpy_scatter_gpu_double_double (int N_unconverged, double const *alpha__, gpu_complex_type< double > const *x__, int ld1__, int const *idx__, gpu_complex_type< double > *y__, int ld2__, int ngv_loc__) |
CUDA kernel to compute wave-function residuals on GPUs.
Definition in file residuals_aux.cu.
__global__ void compute_residuals_gpu_kernel< double > | ( | int const | num_rows_loc__, |
double const * | eval__, | ||
acc_complex_double_t const * | hpsi__, | ||
acc_complex_double_t const * | opsi__, | ||
acc_complex_double_t * | res__ | ||
) |
Definition at line 42 of file residuals_aux.cu.
__global__ void compute_residuals_gpu_kernel< float > | ( | int const | num_rows_loc__, |
float const * | eval__, | ||
acc_complex_float_t const * | hpsi__, | ||
acc_complex_float_t const * | opsi__, | ||
acc_complex_float_t * | res__ | ||
) |
Definition at line 62 of file residuals_aux.cu.
void compute_residuals_gpu_double | ( | acc_complex_double_t * | hpsi__, |
acc_complex_double_t * | opsi__, | ||
acc_complex_double_t * | res__, | ||
int | num_rows_loc__, | ||
int | num_bands__, | ||
double * | eval__ | ||
) |
Definition at line 82 of file residuals_aux.cu.
void compute_residuals_gpu_float | ( | acc_complex_float_t * | hpsi__, |
acc_complex_float_t * | opsi__, | ||
acc_complex_float_t * | res__, | ||
int | num_rows_loc__, | ||
int | num_bands__, | ||
float * | eval__ | ||
) |
Definition at line 101 of file residuals_aux.cu.
__global__ void add_square_sum_gpu_kernel | ( | int | num_rows_loc__, |
gpu_complex_type< T > const * | wf__, | ||
int | reduced__, | ||
int | mpi_rank__, | ||
T * | result__ | ||
) |
Definition at line 122 of file residuals_aux.cu.
void add_square_sum_gpu_double | ( | acc_complex_double_t * | wf__, |
int | num_rows_loc__, | ||
int | nwf__, | ||
int | reduced__, | ||
int | mpi_rank__, | ||
double * | result__ | ||
) |
Definition at line 170 of file residuals_aux.cu.
void add_square_sum_gpu_float | ( | acc_complex_float_t * | wf__, |
int | num_rows_loc__, | ||
int | nwf__, | ||
int | reduced__, | ||
int | mpi_rank__, | ||
float * | result__ | ||
) |
Definition at line 184 of file residuals_aux.cu.
|
inlinestatic |
Definition at line 201 of file residuals_aux.cu.
|
inlinestatic |
For complex-type F (complex<double> or complex<float>).
Definition at line 209 of file residuals_aux.cu.
__global__ void inner_diag_local_gpu_kernel | ( | gpu_complex_type< T > const * | wf1__, |
int | ld1__, | ||
gpu_complex_type< T > const * | wf2__, | ||
int | ld2__, | ||
int | ngv_loc__, | ||
int | reduced__, | ||
F * | result__ | ||
) |
Definition at line 216 of file residuals_aux.cu.
void inner_diag_local_gpu_double_complex_double | ( | gpu_complex_type< double > * | wf1__, |
int | ld1__, | ||
gpu_complex_type< double > * | wf2__, | ||
int | ld2__, | ||
int | ngv_loc__, | ||
int | nwf__, | ||
gpu_complex_type< double > * | result__ | ||
) |
Definition at line 265 of file residuals_aux.cu.
void inner_diag_local_gpu_double_double | ( | gpu_complex_type< double > * | wf1__, |
int | ld1__, | ||
gpu_complex_type< double > * | wf2__, | ||
int | ld2__, | ||
int | ngv_loc__, | ||
int | nwf__, | ||
int | reduced__, | ||
double * | result__ | ||
) |
Definition at line 278 of file residuals_aux.cu.
__global__ void apply_preconditioner_gpu_kernel< double > | ( | int const | num_rows_loc__, |
double const * | eval__, | ||
double const * | h_diag__, | ||
double const * | o_diag__, | ||
acc_complex_double_t * | res__ | ||
) |
Definition at line 311 of file residuals_aux.cu.
__global__ void apply_preconditioner_gpu_kernel< float > | ( | int const | num_rows_loc__, |
float const * | eval__, | ||
float const * | h_diag__, | ||
float const * | o_diag__, | ||
acc_complex_float_t * | res__ | ||
) |
Definition at line 329 of file residuals_aux.cu.
void apply_preconditioner_gpu_double | ( | acc_complex_double_t * | res__, |
int | num_rows_loc__, | ||
int | num_bands__, | ||
double * | eval__, | ||
const double * | h_diag__, | ||
const double * | o_diag__ | ||
) |
Definition at line 347 of file residuals_aux.cu.
void apply_preconditioner_gpu_float | ( | acc_complex_float_t * | res__, |
int | num_rows_loc__, | ||
int | num_bands__, | ||
float * | eval__, | ||
const float * | h_diag__, | ||
const float * | o_diag__ | ||
) |
Definition at line 360 of file residuals_aux.cu.
__global__ void make_real_g0_gpu_kernel< double > | ( | acc_complex_double_t * | res__, |
int | ld__ | ||
) |
Definition at line 378 of file residuals_aux.cu.
__global__ void make_real_g0_gpu_kernel< float > | ( | acc_complex_float_t * | res__, |
int | ld__ | ||
) |
Definition at line 388 of file residuals_aux.cu.
void make_real_g0_gpu_double | ( | acc_complex_double_t * | res__, |
int | ld__, | ||
int | n__ | ||
) |
Definition at line 398 of file residuals_aux.cu.
void make_real_g0_gpu_float | ( | acc_complex_float_t * | res__, |
int | ld__, | ||
int | n__ | ||
) |
Definition at line 406 of file residuals_aux.cu.
__global__ void axpby_gpu_kernel | ( | F const * | beta__, |
gpu_complex_type< T > * | y__, | ||
int | ld2__, | ||
int | ngv_loc__ | ||
) |
Definition at line 416 of file residuals_aux.cu.
__global__ void axpby_gpu_kernel | ( | F const * | alpha__, |
gpu_complex_type< T > const * | x__, | ||
int | ld1__, | ||
F const * | beta__, | ||
gpu_complex_type< T > * | y__, | ||
int | ld2__, | ||
int | ngv_loc__ | ||
) |
Definition at line 430 of file residuals_aux.cu.
__global__ void axpy_scatter_gpu_kernel | ( | F const * | alpha__, |
gpu_complex_type< T > const * | x__, | ||
int | ld1__, | ||
int const * | idx__, | ||
gpu_complex_type< T > * | y__, | ||
int | ld2__, | ||
int | ngv_loc__ | ||
) |
Definition at line 455 of file residuals_aux.cu.
void axpby_gpu_double_complex_double | ( | int | nwf__, |
gpu_complex_type< double > const * | alpha__, | ||
gpu_complex_type< double > const * | x__, | ||
int | ld1__, | ||
gpu_complex_type< double > const * | beta__, | ||
gpu_complex_type< double > * | y__, | ||
int | ld2__, | ||
int | ngv_loc__ | ||
) |
Definition at line 476 of file residuals_aux.cu.
void axpby_gpu_double_double | ( | int | nwf__, |
double const * | alpha__, | ||
gpu_complex_type< double > const * | x__, | ||
int | ld1__, | ||
double const * | beta__, | ||
gpu_complex_type< double > * | y__, | ||
int | ld2__, | ||
int | ngv_loc__ | ||
) |
Definition at line 492 of file residuals_aux.cu.
void axpy_scatter_gpu_double_complex_double | ( | int | N_unconverged, |
gpu_complex_type< double > const * | alpha__, | ||
gpu_complex_type< double > const * | x__, | ||
int | ld1__, | ||
int const * | idx__, | ||
gpu_complex_type< double > * | y__, | ||
int | ld2__, | ||
int | ngv_loc__ | ||
) |
Definition at line 507 of file residuals_aux.cu.
void axpy_scatter_gpu_double_double | ( | int | N_unconverged, |
double const * | alpha__, | ||
gpu_complex_type< double > const * | x__, | ||
int | ld1__, | ||
int const * | idx__, | ||
gpu_complex_type< double > * | y__, | ||
int | ld2__, | ||
int | ngv_loc__ | ||
) |
Definition at line 517 of file residuals_aux.cu.