SIRIUS 7.5.0
Electronic structure library and applications
sum_q_pw_dm_pw.cu
Go to the documentation of this file.
1// Copyright (c) 2013-2018 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file sum_q_pw_dm_pw.cu
21 *
22 * \brief CUDA kernel to perform a summation over xi,xi' indices for the charge density augmentation.
23 */
24
27
28#ifdef SIRIUS_CUDA
30#endif
31
32using namespace sirius;
33using namespace sirius::acc;
34
35__global__ void
36sum_q_pw_dm_pw_gpu_kernel(int nbf__, double const* q_pw__, int ldq__, double const* dm_pw__, int ldd__,
37 double const* sym_weight__, acc_complex_double_t* rho_pw__)
38{
39 ACC_DYNAMIC_SHARED( char, sdata_ptr)
40 double* rho_re = (double*)&sdata_ptr[0];
41 double* rho_im = (double*)&sdata_ptr[sizeof(double) * blockDim.x];
42
43 int igloc = blockIdx.x;
44
45 rho_re[threadIdx.x] = 0;
46 rho_im[threadIdx.x] = 0;
47
48 int ld = nbf__ * (nbf__ + 1) / 2;
49
50 int N = num_blocks(ld, blockDim.x);
51
52 for (int n = 0; n < N; n++) {
53 int i = n * blockDim.x + threadIdx.x;
54 if (i < ld) {
55 double qx = q_pw__[array2D_offset(i, 2 * igloc, ldq__)];
56 double qy = q_pw__[array2D_offset(i, 2 * igloc + 1, ldq__)];
57 double dx = dm_pw__[array2D_offset(i, 2 * igloc, ldd__)];
58 double dy = dm_pw__[array2D_offset(i, 2 * igloc + 1, ldd__)];
59
60 rho_re[threadIdx.x] += sym_weight__[i] * (dx * qx - dy * qy);
61 rho_im[threadIdx.x] += sym_weight__[i] * (dy * qx + dx * qy);
62 }
63 }
64 __syncthreads();
65
66 for (int s = 1; s < blockDim.x; s *= 2) {
67 if (threadIdx.x % (2 * s) == 0) {
68 rho_re[threadIdx.x] = rho_re[threadIdx.x] + rho_re[threadIdx.x + s];
69 rho_im[threadIdx.x] = rho_im[threadIdx.x] + rho_im[threadIdx.x + s];
70 }
71 __syncthreads();
72 }
73 if (threadIdx.x == 0) {
74 rho_pw__[igloc] = accCadd(rho_pw__[igloc], make_accDoubleComplex(rho_re[0], rho_im[0]));
75 }
76}
77
78extern "C" void sum_q_pw_dm_pw_gpu(int num_gvec_loc__, int nbf__, double const* q_pw__, int ldq__,
79 double const* dm_pw__, int ldd__, double const* sym_weight__,
80 acc_complex_double_t* rho_pw__, int stream_id__)
81{
82#ifdef SIRIUS_CUDA
83 CUDA_timer t("sum_q_pw_dm_pw_gpu");
84#endif
85
86 acc_stream_t stream = (acc_stream_t)acc::stream(stream_id(stream_id__));
87
88 dim3 grid_t(64);
89 dim3 grid_b(num_gvec_loc__);
90
91 accLaunchKernel((sum_q_pw_dm_pw_gpu_kernel), dim3(grid_b), dim3(grid_t), 2 * grid_t.x * sizeof(double), stream,
92 nbf__, q_pw__, ldq__, dm_pw__, ldd__, sym_weight__, rho_pw__);
93}
94
95
Common device functions used by GPU kernels.
Uniform interface to the runtime API of CUDA and ROCm.
Helper class to wrap stream id (integer number).
Definition: acc.hpp:132
Timer for CUDA kernels.
Namespace for accelerator-related functions.
Definition: acc.cpp:30
acc_stream_t stream(stream_id sid__)
Return a single device stream.
Definition: acc.hpp:202
Namespace of the SIRIUS library.
Definition: sirius.f90:5