SIRIUS 7.5.0
Electronic structure library and applications
augmentation_operator.cu
Go to the documentation of this file.
1// Copyright (c) 2013-2019 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file augmentation_operator.cu
21 *
22 * \brief CUDA kernels to generate augmentation operator and its derivative.
23 */
24
25#include "core/acc/acc.hpp"
28
29using namespace sirius;
30using namespace sirius::acc;
31
32__global__ void aug_op_pw_coeffs_gpu_kernel(int ngvec__, int const* gvec_shell__, int const* idx__, int idxmax__,
33 acc_complex_double_t const* zilm__, int const* l_by_lm__, int lmmax__,
34 double const* gc__, int ld0__, int ld1__,
35 double const* gvec_rlm__, int ld2__,
36 double const* ri_values__, int ld3__, int ld4__,
37 double* q_pw__, int ld5__, double fourpi_omega__)
38
39{
40 int igloc = blockDim.x * blockIdx.x + threadIdx.x;
41 int idx12 = blockIdx.y;
42 int idxsh = gvec_shell__[igloc];
43
44 if (igloc < ngvec__) {
45 int lm1 = idx__[array2D_offset(0, idx12, 3)];
46 int lm2 = idx__[array2D_offset(1, idx12, 3)];
47 int idxrf12 = idx__[array2D_offset(2, idx12, 3)];
48
49 acc_complex_double_t z = make_accDoubleComplex(0, 0);
50 for (int lm = 0; lm < lmmax__; lm++) {
51 double d = gvec_rlm__[array2D_offset(lm, igloc, ld2__)] *
52 ri_values__[array3D_offset(idxrf12, l_by_lm__[lm], idxsh, ld3__, ld4__)] *
53 gc__[array3D_offset(lm, lm2, lm1, ld0__, ld1__)];
54 z.x += d * zilm__[lm].x;
55 z.y -= d * zilm__[lm].y;
56 }
57 q_pw__[array2D_offset(idx12, 2 * igloc, ld5__)] = z.x * fourpi_omega__;
58 q_pw__[array2D_offset(idx12, 2 * igloc + 1, ld5__)] = z.y * fourpi_omega__;
59 }
60}
61
62extern "C" void aug_op_pw_coeffs_gpu(int ngvec__, int const* gvec_shell__, int const* idx__, int idxmax__,
63 acc_complex_double_t const* zilm__, int const* l_by_lm__, int lmmax__,
64 double const* gc__, int ld0__, int ld1__,
65 double const* gvec_rlm__, int ld2__,
66 double const* ri_values__, int ld3__, int ld4__,
67 double* q_pw__, int ld5__, double fourpi_omega__)
68{
69 dim3 grid_t(32);
70 dim3 grid_b(num_blocks(ngvec__, grid_t.x), idxmax__);
71
72 accLaunchKernel((aug_op_pw_coeffs_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0,
73 ngvec__, gvec_shell__, idx__, idxmax__, zilm__, l_by_lm__, lmmax__, gc__, ld0__, ld1__, gvec_rlm__, ld2__,
74 ri_values__, ld3__, ld4__, q_pw__, ld5__, fourpi_omega__);
75}
76
77__global__ void aug_op_pw_coeffs_deriv_gpu_kernel(int ngvec__, int const* gvec_shell__, double const* gvec_cart__,
78 int const* idx__, int idxmax__,
79 double const* gc__, int ld0__, int ld1__,
80 double const* rlm__, double const* rlm_dg__, int ld2__,
81 double const* ri_values__, double const* ri_dg_values__, int ld3__,
82 int ld4__, double* q_pw__, int ld5__, double fourpi__, int nu__,
83 int lmax_q__)
84
85{
86 int igloc = blockDim.x * blockIdx.x + threadIdx.x;
87 int idx12 = blockIdx.y;
88 int idxsh = gvec_shell__[igloc];
89
90 if (igloc < ngvec__) {
91 int lm1 = idx__[array2D_offset(0, idx12, 3)];
92 int lm2 = idx__[array2D_offset(1, idx12, 3)];
93 int idxrf12 = idx__[array2D_offset(2, idx12, 3)];
94 double gvc_nu = gvec_cart__[array2D_offset(nu__, igloc, 3)];
95
96 acc_complex_double_t z = make_accDoubleComplex(0, 0);
97 acc_complex_double_t phase = make_accDoubleComplex(1, 0);
98 int lm = 0;
99 for (int l = 0; l <= lmax_q__; l++) {
100 double d1 = 0;
101 double d2 = 0;
102 for (int m = -l; m <= l; m++, lm++) {
103 double gc = gc__[array3D_offset(lm, lm2, lm1, ld0__, ld1__)];
104 d1 += rlm_dg__[array3D_offset(lm, nu__, igloc, ld2__, 3)] * gc;
105 d2 += rlm__[array2D_offset(lm, igloc, ld2__)] * gc;
106 }
107 double d = d1 * ri_values__[array3D_offset(l, idxrf12, idxsh, ld3__, ld4__)] +
108 d2 * ri_dg_values__[array3D_offset(l, idxrf12, idxsh, ld3__, ld4__)] * gvc_nu;
109 z.x += d * phase.x;
110 z.y -= d * phase.y;
111 /* i^l */
112 phase = accCmul(phase, make_accDoubleComplex(0, 1));
113 }
114 q_pw__[array2D_offset(idx12, 2 * igloc, ld5__)] = z.x * fourpi__;
115 q_pw__[array2D_offset(idx12, 2 * igloc + 1, ld5__)] = z.y * fourpi__;
116 }
117}
118
119extern "C" void aug_op_pw_coeffs_deriv_gpu(int ngvec__, int const* gvec_shell__, double const* gvec_cart__,
120 int const* idx__, int idxmax__,
121 double const* gc__, int ld0__, int ld1__,
122 double const* rlm__, double const* rlm_dg__, int ld2__,
123 double const* ri_values__, double const* ri_dg_values__, int ld3__, int ld4__,
124 double* q_pw__, int ld5__, double fourpi__, int nu__, int lmax_q__)
125{
126 dim3 grid_t(32);
127 dim3 grid_b(num_blocks(ngvec__, grid_t.x), idxmax__);
128
129 accLaunchKernel((aug_op_pw_coeffs_deriv_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0,
130 ngvec__, gvec_shell__, gvec_cart__, idx__, idxmax__, gc__, ld0__, ld1__,
131 rlm__, rlm_dg__, ld2__, ri_values__, ri_dg_values__, ld3__, ld4__, q_pw__, ld5__, fourpi__, nu__, lmax_q__);
132}
133
Interface to accelerators API.
Common device functions used by GPU kernels.
Uniform interface to the runtime API of CUDA and ROCm.
Namespace for accelerator-related functions.
Definition: acc.cpp:30
int lm(int l, int m)
Get composite lm index by angular index l and azimuthal index m.
Definition: specfunc.hpp:50
Namespace of the SIRIUS library.
Definition: sirius.f90:5