SIRIUS 7.5.0
Electronic structure library and applications
density_rg.cu
Go to the documentation of this file.
1// Copyright (c) 2013-2018 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file density_rg.cu
21 *
22 * \brief CUDA kernel to update density on the regular FFT grid.
23 */
24
27
28using namespace sirius;
29using namespace sirius::acc;
30
31template <typename T>
32__global__ void update_density_rg_1_complex_gpu_kernel(int size__,
33 gpu_complex_type<T> const* psi_rg__,
34 T wt__,
35 T* density_rg__)
36{
37 int ir = blockIdx.x * blockDim.x + threadIdx.x;
38 if (ir < size__) {
39 gpu_complex_type<T> z = psi_rg__[ir];
40 density_rg__[ir] += (z.x * z.x + z.y * z.y) * wt__;
41 }
42}
43
44/* Update one density component from one complex wave-function */
45extern "C" void update_density_rg_1_complex_gpu_double(int size__,
46 acc_complex_double_t const* psi_rg__,
47 double wt__,
48 double* density_rg__)
49{
50 // CUDA_timer t("update_density_rg_1_gpu");
51
52 dim3 grid_t(64);
53 dim3 grid_b(num_blocks(size__, grid_t.x));
54
55 accLaunchKernel((update_density_rg_1_complex_gpu_kernel<double>), dim3(grid_b), dim3(grid_t), 0, 0, size__,
56 psi_rg__, wt__, density_rg__);
57}
58
59extern "C" void update_density_rg_1_complex_gpu_float(int size__,
60 acc_complex_float_t const* psi_rg__,
61 float wt__,
62 float* density_rg__)
63{
64 // CUDA_timer t("update_density_rg_1_gpu");
65
66 dim3 grid_t(64);
67 dim3 grid_b(num_blocks(size__, grid_t.x));
68
69 accLaunchKernel((update_density_rg_1_complex_gpu_kernel<float>), dim3(grid_b), dim3(grid_t), 0, 0, size__,
70 psi_rg__, wt__, density_rg__);
71}
72
73template <typename T>
74__global__ void update_density_rg_1_real_gpu_kernel(int size__,
75 T const* psi_rg__,
76 T wt__,
77 T* density_rg__)
78{
79 int ir = blockIdx.x * blockDim.x + threadIdx.x;
80 if (ir < size__) {
81 T p = psi_rg__[ir];
82 density_rg__[ir] += p * p * wt__;
83 }
84}
85
86/* Update one density component from one real wave-function */
87extern "C" void update_density_rg_1_real_gpu_double(int size__,
88 double const* psi_rg__,
89 double wt__,
90 double* density_rg__)
91{
92 //CUDA_timer t("update_density_rg_1_gpu");
93
94 dim3 grid_t(64);
95 dim3 grid_b(num_blocks(size__, grid_t.x));
96
97 accLaunchKernel((update_density_rg_1_real_gpu_kernel<double>), dim3(grid_b), dim3(grid_t), 0, 0,
98 size__,
99 psi_rg__,
100 wt__,
101 density_rg__
102 );
103}
104
105extern "C" void update_density_rg_1_real_gpu_float(int size__,
106 float const* psi_rg__,
107 float wt__,
108 float* density_rg__)
109{
110 //CUDA_timer t("update_density_rg_1_gpu");
111
112 dim3 grid_t(64);
113 dim3 grid_b(num_blocks(size__, grid_t.x));
114
115 accLaunchKernel((update_density_rg_1_real_gpu_kernel<float>), dim3(grid_b), dim3(grid_t), 0, 0,
116 size__,
117 psi_rg__,
118 wt__,
119 density_rg__
120 );
121}
122
123template <typename T>
124__global__ void update_density_rg_2_gpu_kernel(int size__,
125 gpu_complex_type<T> const* psi_up_rg__,
126 gpu_complex_type<T> const* psi_dn_rg__,
127 T wt__,
128 T* density_x_rg__,
129 T* density_y_rg__);
130
131template <>
132__global__ void update_density_rg_2_gpu_kernel<double>(int size__,
133 acc_complex_double_t const* psi_up_rg__,
134 acc_complex_double_t const* psi_dn_rg__,
135 double wt__,
136 double* density_x_rg__,
137 double* density_y_rg__)
138{
139 int ir = blockIdx.x * blockDim.x + threadIdx.x;
140 if (ir < size__) {
141 acc_complex_double_t z = accCmul(psi_up_rg__[ir], accConj(psi_dn_rg__[ir]));
142 density_x_rg__[ir] += 2 * z.x * wt__;
143 density_y_rg__[ir] -= 2 * z.y * wt__;
144 }
145}
146
147template <>
148__global__ void update_density_rg_2_gpu_kernel<float>(int size__,
149 acc_complex_float_t const* psi_up_rg__,
150 acc_complex_float_t const* psi_dn_rg__,
151 float wt__,
152 float* density_x_rg__,
153 float* density_y_rg__)
154{
155 int ir = blockIdx.x * blockDim.x + threadIdx.x;
156 if (ir < size__) {
157 acc_complex_float_t z = accCmulf(psi_up_rg__[ir], accConjf(psi_dn_rg__[ir]));
158 density_x_rg__[ir] += 2 * z.x * wt__;
159 density_y_rg__[ir] -= 2 * z.y * wt__;
160 }
161}
162
163/* Update off-diagonal density component in non-collinear case */
164extern "C" void update_density_rg_2_gpu_double(int size__,
165 acc_complex_double_t const* psi_up_rg__,
166 acc_complex_double_t const* psi_dn_rg__,
167 double wt__,
168 double* density_x_rg__,
169 double* density_y_rg__)
170{
171 //CUDA_timer t("update_density_rg_1_gpu");
172
173 dim3 grid_t(64);
174 dim3 grid_b(num_blocks(size__, grid_t.x));
175
176 accLaunchKernel((update_density_rg_2_gpu_kernel<double>), dim3(grid_b), dim3(grid_t), 0, 0,
177 size__,
178 psi_up_rg__,
179 psi_dn_rg__,
180 wt__,
181 density_x_rg__,
182 density_y_rg__
183 );
184}
185
186extern "C" void update_density_rg_2_gpu_float(int size__,
187 acc_complex_float_t const* psi_up_rg__,
188 acc_complex_float_t const* psi_dn_rg__,
189 float wt__,
190 float* density_x_rg__,
191 float* density_y_rg__)
192{
193 //CUDA_timer t("update_density_rg_1_gpu");
194
195 dim3 grid_t(64);
196 dim3 grid_b(num_blocks(size__, grid_t.x));
197
198 accLaunchKernel((update_density_rg_2_gpu_kernel<float>), dim3(grid_b), dim3(grid_t), 0, 0,
199 size__,
200 psi_up_rg__,
201 psi_dn_rg__,
202 wt__,
203 density_x_rg__,
204 density_y_rg__
205 );
206}
207
208
Common device functions used by GPU kernels.
Uniform interface to the runtime API of CUDA and ROCm.
Namespace for accelerator-related functions.
Definition: acc.cpp:30
Namespace of the SIRIUS library.
Definition: sirius.f90:5