|
SIRIUS 7.5.0
Electronic structure library and applications
|
CUDA kernel to compute wave-function residuals on GPUs. More...
Go to the source code of this file.
Functions | |
| template<typename T > | |
| __global__ void | compute_residuals_gpu_kernel (int const num_rows_loc__, T const *eval__, gpu_complex_type< T > const *hpsi__, gpu_complex_type< T > const *opsi__, gpu_complex_type< T > *res__) |
| template<> | |
| __global__ void | compute_residuals_gpu_kernel< double > (int const num_rows_loc__, double const *eval__, acc_complex_double_t const *hpsi__, acc_complex_double_t const *opsi__, acc_complex_double_t *res__) |
| template<> | |
| __global__ void | compute_residuals_gpu_kernel< float > (int const num_rows_loc__, float const *eval__, acc_complex_float_t const *hpsi__, acc_complex_float_t const *opsi__, acc_complex_float_t *res__) |
| void | compute_residuals_gpu_double (acc_complex_double_t *hpsi__, acc_complex_double_t *opsi__, acc_complex_double_t *res__, int num_rows_loc__, int num_bands__, double *eval__) |
| void | compute_residuals_gpu_float (acc_complex_float_t *hpsi__, acc_complex_float_t *opsi__, acc_complex_float_t *res__, int num_rows_loc__, int num_bands__, float *eval__) |
| template<typename T > | |
| __global__ void | add_square_sum_gpu_kernel (int num_rows_loc__, gpu_complex_type< T > const *wf__, int reduced__, int mpi_rank__, T *result__) |
| void | add_square_sum_gpu_double (acc_complex_double_t *wf__, int num_rows_loc__, int nwf__, int reduced__, int mpi_rank__, double *result__) |
| void | add_square_sum_gpu_float (acc_complex_float_t *wf__, int num_rows_loc__, int nwf__, int reduced__, int mpi_rank__, float *result__) |
| template<typename T , typename F > | |
| static __device__ std::enable_if_t< std::is_scalar< F >::value, F > | inner_diag_local_aux (gpu_complex_type< T > z1__, gpu_complex_type< T > z2__) |
| template<typename T , typename F > | |
| static __device__ std::enable_if_t<!std::is_scalar< F >::value, F > | inner_diag_local_aux (gpu_complex_type< T > z1__, gpu_complex_type< T > z2__) |
| For complex-type F (complex<double> or complex<float>). More... | |
| template<typename T , typename F > | |
| __global__ void | inner_diag_local_gpu_kernel (gpu_complex_type< T > const *wf1__, int ld1__, gpu_complex_type< T > const *wf2__, int ld2__, int ngv_loc__, int reduced__, F *result__) |
| void | inner_diag_local_gpu_double_complex_double (gpu_complex_type< double > *wf1__, int ld1__, gpu_complex_type< double > *wf2__, int ld2__, int ngv_loc__, int nwf__, gpu_complex_type< double > *result__) |
| void | inner_diag_local_gpu_double_double (gpu_complex_type< double > *wf1__, int ld1__, gpu_complex_type< double > *wf2__, int ld2__, int ngv_loc__, int nwf__, int reduced__, double *result__) |
| template<typename T > | |
| __global__ void | apply_preconditioner_gpu_kernel (int const num_rows_loc__, T const *eval__, T const *h_diag__, T const *o_diag__, gpu_complex_type< T > *res__) |
| template<> | |
| __global__ void | apply_preconditioner_gpu_kernel< double > (int const num_rows_loc__, double const *eval__, double const *h_diag__, double const *o_diag__, acc_complex_double_t *res__) |
| template<> | |
| __global__ void | apply_preconditioner_gpu_kernel< float > (int const num_rows_loc__, float const *eval__, float const *h_diag__, float const *o_diag__, acc_complex_float_t *res__) |
| void | apply_preconditioner_gpu_double (acc_complex_double_t *res__, int num_rows_loc__, int num_bands__, double *eval__, const double *h_diag__, const double *o_diag__) |
| void | apply_preconditioner_gpu_float (acc_complex_float_t *res__, int num_rows_loc__, int num_bands__, float *eval__, const float *h_diag__, const float *o_diag__) |
| template<typename T > | |
| __global__ void | make_real_g0_gpu_kernel (gpu_complex_type< T > *res__, int ld__) |
| template<> | |
| __global__ void | make_real_g0_gpu_kernel< double > (acc_complex_double_t *res__, int ld__) |
| template<> | |
| __global__ void | make_real_g0_gpu_kernel< float > (acc_complex_float_t *res__, int ld__) |
| void | make_real_g0_gpu_double (acc_complex_double_t *res__, int ld__, int n__) |
| void | make_real_g0_gpu_float (acc_complex_float_t *res__, int ld__, int n__) |
| template<typename T , typename F > | |
| __global__ void | axpby_gpu_kernel (F const *beta__, gpu_complex_type< T > *y__, int ld2__, int ngv_loc__) |
| template<typename T , typename F > | |
| __global__ void | axpby_gpu_kernel (F const *alpha__, gpu_complex_type< T > const *x__, int ld1__, F const *beta__, gpu_complex_type< T > *y__, int ld2__, int ngv_loc__) |
| template<typename T , typename F > | |
| __global__ void | axpy_scatter_gpu_kernel (F const *alpha__, gpu_complex_type< T > const *x__, int ld1__, int const *idx__, gpu_complex_type< T > *y__, int ld2__, int ngv_loc__) |
| void | axpby_gpu_double_complex_double (int nwf__, gpu_complex_type< double > const *alpha__, gpu_complex_type< double > const *x__, int ld1__, gpu_complex_type< double > const *beta__, gpu_complex_type< double > *y__, int ld2__, int ngv_loc__) |
| void | axpby_gpu_double_double (int nwf__, double const *alpha__, gpu_complex_type< double > const *x__, int ld1__, double const *beta__, gpu_complex_type< double > *y__, int ld2__, int ngv_loc__) |
| void | axpy_scatter_gpu_double_complex_double (int N_unconverged, gpu_complex_type< double > const *alpha__, gpu_complex_type< double > const *x__, int ld1__, int const *idx__, gpu_complex_type< double > *y__, int ld2__, int ngv_loc__) |
| void | axpy_scatter_gpu_double_double (int N_unconverged, double const *alpha__, gpu_complex_type< double > const *x__, int ld1__, int const *idx__, gpu_complex_type< double > *y__, int ld2__, int ngv_loc__) |
CUDA kernel to compute wave-function residuals on GPUs.
Definition in file residuals_aux.cu.
| __global__ void compute_residuals_gpu_kernel< double > | ( | int const | num_rows_loc__, |
| double const * | eval__, | ||
| acc_complex_double_t const * | hpsi__, | ||
| acc_complex_double_t const * | opsi__, | ||
| acc_complex_double_t * | res__ | ||
| ) |
Definition at line 42 of file residuals_aux.cu.
| __global__ void compute_residuals_gpu_kernel< float > | ( | int const | num_rows_loc__, |
| float const * | eval__, | ||
| acc_complex_float_t const * | hpsi__, | ||
| acc_complex_float_t const * | opsi__, | ||
| acc_complex_float_t * | res__ | ||
| ) |
Definition at line 62 of file residuals_aux.cu.
| void compute_residuals_gpu_double | ( | acc_complex_double_t * | hpsi__, |
| acc_complex_double_t * | opsi__, | ||
| acc_complex_double_t * | res__, | ||
| int | num_rows_loc__, | ||
| int | num_bands__, | ||
| double * | eval__ | ||
| ) |
Definition at line 82 of file residuals_aux.cu.
| void compute_residuals_gpu_float | ( | acc_complex_float_t * | hpsi__, |
| acc_complex_float_t * | opsi__, | ||
| acc_complex_float_t * | res__, | ||
| int | num_rows_loc__, | ||
| int | num_bands__, | ||
| float * | eval__ | ||
| ) |
Definition at line 101 of file residuals_aux.cu.
| __global__ void add_square_sum_gpu_kernel | ( | int | num_rows_loc__, |
| gpu_complex_type< T > const * | wf__, | ||
| int | reduced__, | ||
| int | mpi_rank__, | ||
| T * | result__ | ||
| ) |
Definition at line 122 of file residuals_aux.cu.
| void add_square_sum_gpu_double | ( | acc_complex_double_t * | wf__, |
| int | num_rows_loc__, | ||
| int | nwf__, | ||
| int | reduced__, | ||
| int | mpi_rank__, | ||
| double * | result__ | ||
| ) |
Definition at line 170 of file residuals_aux.cu.
| void add_square_sum_gpu_float | ( | acc_complex_float_t * | wf__, |
| int | num_rows_loc__, | ||
| int | nwf__, | ||
| int | reduced__, | ||
| int | mpi_rank__, | ||
| float * | result__ | ||
| ) |
Definition at line 184 of file residuals_aux.cu.
|
inlinestatic |
Definition at line 201 of file residuals_aux.cu.
|
inlinestatic |
For complex-type F (complex<double> or complex<float>).
Definition at line 209 of file residuals_aux.cu.
| __global__ void inner_diag_local_gpu_kernel | ( | gpu_complex_type< T > const * | wf1__, |
| int | ld1__, | ||
| gpu_complex_type< T > const * | wf2__, | ||
| int | ld2__, | ||
| int | ngv_loc__, | ||
| int | reduced__, | ||
| F * | result__ | ||
| ) |
Definition at line 216 of file residuals_aux.cu.
| void inner_diag_local_gpu_double_complex_double | ( | gpu_complex_type< double > * | wf1__, |
| int | ld1__, | ||
| gpu_complex_type< double > * | wf2__, | ||
| int | ld2__, | ||
| int | ngv_loc__, | ||
| int | nwf__, | ||
| gpu_complex_type< double > * | result__ | ||
| ) |
Definition at line 265 of file residuals_aux.cu.
| void inner_diag_local_gpu_double_double | ( | gpu_complex_type< double > * | wf1__, |
| int | ld1__, | ||
| gpu_complex_type< double > * | wf2__, | ||
| int | ld2__, | ||
| int | ngv_loc__, | ||
| int | nwf__, | ||
| int | reduced__, | ||
| double * | result__ | ||
| ) |
Definition at line 278 of file residuals_aux.cu.
| __global__ void apply_preconditioner_gpu_kernel< double > | ( | int const | num_rows_loc__, |
| double const * | eval__, | ||
| double const * | h_diag__, | ||
| double const * | o_diag__, | ||
| acc_complex_double_t * | res__ | ||
| ) |
Definition at line 311 of file residuals_aux.cu.
| __global__ void apply_preconditioner_gpu_kernel< float > | ( | int const | num_rows_loc__, |
| float const * | eval__, | ||
| float const * | h_diag__, | ||
| float const * | o_diag__, | ||
| acc_complex_float_t * | res__ | ||
| ) |
Definition at line 329 of file residuals_aux.cu.
| void apply_preconditioner_gpu_double | ( | acc_complex_double_t * | res__, |
| int | num_rows_loc__, | ||
| int | num_bands__, | ||
| double * | eval__, | ||
| const double * | h_diag__, | ||
| const double * | o_diag__ | ||
| ) |
Definition at line 347 of file residuals_aux.cu.
| void apply_preconditioner_gpu_float | ( | acc_complex_float_t * | res__, |
| int | num_rows_loc__, | ||
| int | num_bands__, | ||
| float * | eval__, | ||
| const float * | h_diag__, | ||
| const float * | o_diag__ | ||
| ) |
Definition at line 360 of file residuals_aux.cu.
| __global__ void make_real_g0_gpu_kernel< double > | ( | acc_complex_double_t * | res__, |
| int | ld__ | ||
| ) |
Definition at line 378 of file residuals_aux.cu.
| __global__ void make_real_g0_gpu_kernel< float > | ( | acc_complex_float_t * | res__, |
| int | ld__ | ||
| ) |
Definition at line 388 of file residuals_aux.cu.
| void make_real_g0_gpu_double | ( | acc_complex_double_t * | res__, |
| int | ld__, | ||
| int | n__ | ||
| ) |
Definition at line 398 of file residuals_aux.cu.
| void make_real_g0_gpu_float | ( | acc_complex_float_t * | res__, |
| int | ld__, | ||
| int | n__ | ||
| ) |
Definition at line 406 of file residuals_aux.cu.
| __global__ void axpby_gpu_kernel | ( | F const * | beta__, |
| gpu_complex_type< T > * | y__, | ||
| int | ld2__, | ||
| int | ngv_loc__ | ||
| ) |
Definition at line 416 of file residuals_aux.cu.
| __global__ void axpby_gpu_kernel | ( | F const * | alpha__, |
| gpu_complex_type< T > const * | x__, | ||
| int | ld1__, | ||
| F const * | beta__, | ||
| gpu_complex_type< T > * | y__, | ||
| int | ld2__, | ||
| int | ngv_loc__ | ||
| ) |
Definition at line 430 of file residuals_aux.cu.
| __global__ void axpy_scatter_gpu_kernel | ( | F const * | alpha__, |
| gpu_complex_type< T > const * | x__, | ||
| int | ld1__, | ||
| int const * | idx__, | ||
| gpu_complex_type< T > * | y__, | ||
| int | ld2__, | ||
| int | ngv_loc__ | ||
| ) |
Definition at line 455 of file residuals_aux.cu.
| void axpby_gpu_double_complex_double | ( | int | nwf__, |
| gpu_complex_type< double > const * | alpha__, | ||
| gpu_complex_type< double > const * | x__, | ||
| int | ld1__, | ||
| gpu_complex_type< double > const * | beta__, | ||
| gpu_complex_type< double > * | y__, | ||
| int | ld2__, | ||
| int | ngv_loc__ | ||
| ) |
Definition at line 476 of file residuals_aux.cu.
| void axpby_gpu_double_double | ( | int | nwf__, |
| double const * | alpha__, | ||
| gpu_complex_type< double > const * | x__, | ||
| int | ld1__, | ||
| double const * | beta__, | ||
| gpu_complex_type< double > * | y__, | ||
| int | ld2__, | ||
| int | ngv_loc__ | ||
| ) |
Definition at line 492 of file residuals_aux.cu.
| void axpy_scatter_gpu_double_complex_double | ( | int | N_unconverged, |
| gpu_complex_type< double > const * | alpha__, | ||
| gpu_complex_type< double > const * | x__, | ||
| int | ld1__, | ||
| int const * | idx__, | ||
| gpu_complex_type< double > * | y__, | ||
| int | ld2__, | ||
| int | ngv_loc__ | ||
| ) |
Definition at line 507 of file residuals_aux.cu.
| void axpy_scatter_gpu_double_double | ( | int | N_unconverged, |
| double const * | alpha__, | ||
| gpu_complex_type< double > const * | x__, | ||
| int | ld1__, | ||
| int const * | idx__, | ||
| gpu_complex_type< double > * | y__, | ||
| int | ld2__, | ||
| int | ngv_loc__ | ||
| ) |
Definition at line 517 of file residuals_aux.cu.