SIRIUS 7.5.0
Electronic structure library and applications
generate_dm_pw.cu
Go to the documentation of this file.
1// Copyright (c) 2013-2018 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file generate_dm_pw.cu
21 *
22 * \brief CUDA kernel to generate a product of phase-factors and density matrix.
23 */
24
27#include "core/acc/acc_blas.hpp"
28
29using namespace sirius;
30using namespace sirius::acc;
31
32__global__ void generate_phase_factors_conj_gpu_kernel
33(
34 int num_gvec_loc__,
35 int num_atoms__,
36 double const* atom_pos__,
37 int const* gvx__,
38 int const* gvy__,
39 int const* gvz__,
40 acc_complex_double_t* phase_factors__
41)
42{
43 int ia = blockIdx.y;
44 double ax = atom_pos__[array2D_offset(ia, 0, num_atoms__)];
45 double ay = atom_pos__[array2D_offset(ia, 1, num_atoms__)];
46 double az = atom_pos__[array2D_offset(ia, 2, num_atoms__)];
47
48 int igloc = blockIdx.x * blockDim.x + threadIdx.x;
49
50 if (igloc < num_gvec_loc__) {
51 int gvx = gvx__[igloc];
52 int gvy = gvy__[igloc];
53 int gvz = gvz__[igloc];
54
55 double p = twopi * (ax * gvx + ay * gvy + az * gvz);
56 phase_factors__[array2D_offset(igloc, ia, num_gvec_loc__)] = make_accDoubleComplex(cos(p), -sin(p));
57 }
58}
59
60extern "C" void generate_dm_pw_gpu(int num_atoms__,
61 int num_gvec_loc__,
62 int nbf__,
63 double const* atom_pos__,
64 int const* gvx__,
65 int const* gvy__,
66 int const* gvz__,
67 double* phase_factors__,
68 double const* dm__,
69 double* dm_pw__,
70 int stream_id__)
71{
72 //CUDA_timer t("generate_dm_pw_gpu");
73
74 acc_stream_t stream = (acc_stream_t)acc::stream(stream_id(stream_id__));
75
76 dim3 grid_t(32);
77 dim3 grid_b(num_blocks(num_gvec_loc__, grid_t.x), num_atoms__);
78
79 accLaunchKernel((generate_phase_factors_conj_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, stream,
80 num_gvec_loc__,
81 num_atoms__,
82 atom_pos__,
83 gvx__,
84 gvy__,
85 gvz__,
86 (acc_complex_double_t*)phase_factors__
87 );
88
89 double alpha = 1;
90 double beta = 0;
91
92 blas::dgemm('N', 'T', nbf__ * (nbf__ + 1) / 2, num_gvec_loc__ * 2, num_atoms__,
93 &alpha,
94 dm__, nbf__ * (nbf__ + 1) / 2,
95 phase_factors__, num_gvec_loc__ * 2,
96 &beta,
97 dm_pw__, nbf__ * (nbf__ + 1) / 2,
98 stream_id__);
99 acc::sync_stream(stream_id(stream_id__));
100}
101
Blas functions for execution on GPUs.
Common device functions used by GPU kernels.
Uniform interface to the runtime API of CUDA and ROCm.
Helper class to wrap stream id (integer number).
Definition: acc.hpp:132
Namespace for accelerator-related functions.
Definition: acc.cpp:30
acc_stream_t stream(stream_id sid__)
Return a single device stream.
Definition: acc.hpp:202
void sync_stream(stream_id sid__)
Synchronize a single stream.
Definition: acc.hpp:234
Namespace of the SIRIUS library.
Definition: sirius.f90:5