SIRIUS 7.5.0
Electronic structure library and applications
augmentation_operator.cpp
Go to the documentation of this file.
1// Copyright (c) 2013-2018 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file augmentation_operator.cpp
21 *
22 * \brief Contains implementation of sirius::Augmentation_operator class.
23 */
24
26
27namespace sirius {
28
30{
31 if (!atom_type_.augment()) {
32 return;
33 }
34 PROFILE("sirius::Augmentation_operator::generate_pw_coeffs");
35
36 double fourpi_omega = fourpi / gvec_.omega();
37
38 auto const& tp = gvec_.gvec_tp();
39
40 /* maximum l of beta-projectors */
41 int lmax_beta = atom_type_.indexr().lmax();
42 int lmmax = sf::lmmax(2 * lmax_beta);
43
44 /* number of beta-projectors */
45 int nbf = atom_type_.mt_basis_size();
46 /* only half of Q_{xi,xi'}(G) matrix is stored */
47 int nqlm = nbf * (nbf + 1) / 2;
48 /* local number of G-vectors */
49 int gvec_count = gvec_.count();
50
51 /* Info:
52 * After some tests, the current GPU implementation of generating aug. operator turns out to be slower than CPU.
53 * The reason is probaly the memory access pattern of G, lm and, idxrf indices.
54 * The current decision is to compute aug. operator on CPU once during the initialization and
55 * then copy the chunks of Q(G) to GPU when computing D-operator and augment charge density.
56 */
57 switch (atom_type_.parameters().processing_unit()) {
58 case sddk::device_t::CPU:
59 case sddk::device_t::GPU: {
60 /* Gaunt coefficients of three real spherical harmonics */
61 Gaunt_coefficients<double> gaunt_coefs(lmax_beta, 2 * lmax_beta, lmax_beta, SHT::gaunt_rrr);
62 #pragma omp parallel for
63 for (int igloc = 0; igloc < gvec_count; igloc++) {
64 std::vector<double> rlm(lmmax);
65 sf::spherical_harmonics(2 * lmax_beta, tp(igloc, 0), tp(igloc, 1), rlm.data());
66 std::vector<std::complex<double>> v(lmmax);
67 for (int idx12 = 0; idx12 < nqlm; idx12++) {
68 int lm1 = idx_(0, idx12);
69 int lm2 = idx_(1, idx12);
70 int idxrf12 = idx_(2, idx12);
71 for (int lm3 = 0; lm3 < lmmax; lm3++) {
72 v[lm3] = std::conj(zilm_[lm3]) * rlm[lm3] *
73 ri_values_(idxrf12, l_by_lm_[lm3], gvec_.gvec_shell_idx_local(igloc));
74 }
75 std::complex<double> z = fourpi_omega * gaunt_coefs.sum_L3_gaunt(lm2, lm1, &v[0]);
76 q_pw_(idx12, 2 * igloc) = z.real();
77 q_pw_(idx12, 2 * igloc + 1) = z.imag();
78 }
79 }
80 break;
81 }
82// case sddk::device_t::GPU: {
83//#if defined(SIRIUS_GPU)
84// auto spl_ngv_loc = utils::split_in_blocks(gvec_count, atom_type_.parameters().cfg().control().gvec_chunk_size());
85// auto& mpd = sddk::get_memory_pool(sddk::memory_t::device);
86// /* allocate buffer for Rlm on GPUs */
87// sddk::mdarray<double, 2> gvec_rlm(lmmax, spl_ngv_loc[0], mpd, "gvec_rlm");
88// /* allocate buffer for Q(G) on GPUs */
89// sddk::mdarray<double, 2> qpw(nqlm, 2 * spl_ngv_loc[0], mpd, "qpw");
90//
91// int g_begin{0};
92// /* loop over blocks of G-vectors */
93// for (auto ng : spl_ngv_loc) {
94// /* generate Rlm spherical harmonics */
95// spherical_harmonics_rlm_gpu(2 * lmax_beta, ng, tp.at(sddk::memory_t::device, g_begin, 0),
96// tp.at(sddk::memory_t::device, g_begin, 1), gvec_rlm.at(sddk::memory_t::device), gvec_rlm.ld());
97// this->generate_pw_coeffs_chunk_gpu(g_begin, ng, gvec_rlm.at(sddk::memory_t::device), gvec_rlm.ld(), qpw);
98// acc::copyout(q_pw_.at(sddk::memory_t::host, 0, 2 * g_begin), qpw.at(sddk::memory_t::device), 2 * ng * nqlm);
99// g_begin += ng;
100// }
101//#endif
102// break;
103// }
104 }
105
106 q_mtrx_ = sddk::mdarray<double, 2>(nbf, nbf);
107 q_mtrx_.zero();
108
109 if (gvec_.comm().rank() == 0) {
110 for (int xi2 = 0; xi2 < nbf; xi2++) {
111 for (int xi1 = 0; xi1 <= xi2; xi1++) {
112 /* packed orbital index */
113 int idx12 = packed_index(xi1, xi2);
114 q_mtrx_(xi1, xi2) = q_mtrx_(xi2, xi1) = gvec_.omega() * q_pw_(idx12, 0);
115 }
116 }
117 }
118 /* broadcast from rank#0 */
119 gvec_.comm().bcast(&q_mtrx_(0, 0), nbf * nbf, 0);
120
121 if (env::print_checksum()) {
122 auto cs = q_pw_.checksum();
123 auto cs1 = q_mtrx_.checksum();
124 gvec_.comm().allreduce(&cs, 1);
125 if (gvec_.comm().rank() == 0) {
126 print_checksum("q_pw", cs, std::cout);
127 print_checksum("q_mtrx", cs1, std::cout);
128 }
129 }
130}
131
133{
134 if (!atom_type_.augment()) {
135 return;
136 }
137 PROFILE("sirius::Augmentation_operator::generate_pw_coeffs_gvec_deriv");
138
139 auto const& tp = gvec_.gvec_tp();
140
141 /* maximum l of beta-projectors */
142 int lmax_beta = atom_type_.indexr().lmax();
143 int lmmax = sf::lmmax(2 * lmax_beta);
144
145 /* number of beta-projectors */
146 int nbf = atom_type_.mt_basis_size();
147 /* local number of G-vectors */
148 int gvec_count = gvec_.count();
149
150 switch (atom_type_.parameters().processing_unit()) {
151 case sddk::device_t::GPU:
152 case sddk::device_t::CPU: {
153 /* Gaunt coefficients of three real spherical harmonics */
154 Gaunt_coefficients<double> gaunt_coefs(lmax_beta, 2 * lmax_beta, lmax_beta, SHT::gaunt_rrr);
155 #pragma omp parallel for
156 for (int igloc = 0; igloc < gvec_count; igloc++) {
157 /* index of the G-vector shell */
158 int igsh = gvec_.gvec_shell_idx_local(igloc);
159
160 auto gvc = gvec_.gvec_cart<index_domain_t::local>(igloc);
161 double gvc_nu = gvc[nu__];
162
163 std::vector<double> rlm(lmmax);
165 sf::spherical_harmonics(2 * lmax_beta, tp(igloc, 0), tp(igloc, 1), rlm.data());
166 const bool divide_by_r{false};
167 sf::dRlm_dr(2 * lmax_beta, gvc, rlm_dq, divide_by_r);
168
169 std::vector<std::complex<double>> v(lmmax);
170 for (int idx12 = 0; idx12 < nbf * (nbf + 1) / 2; idx12++) {
171 int lm1 = idx_(0, idx12);
172 int lm2 = idx_(1, idx12);
173 int idxrf12 = idx_(2, idx12);
174 for (int lm3 = 0; lm3 < lmmax; lm3++) {
175 int l = l_by_lm_[lm3];
176 v[lm3] = std::conj(zilm_[lm3]) * (rlm_dq(lm3, nu__) * ri_values_(idxrf12, l, igsh) +
177 rlm[lm3] * ri_dq_values_(idxrf12, l, igsh) * gvc_nu);
178 }
179 std::complex<double> z = fourpi * gaunt_coefs.sum_L3_gaunt(lm2, lm1, &v[0]);
180 q_pw_(idx12, 2 * igloc) = z.real();
181 q_pw_(idx12, 2 * igloc + 1) = z.imag();
182 }
183 }
184 break;
185 }
186 }
187}
188
189} // namespace sirius
Contains implementation of sirius::Augmentation_operator class.
int mt_basis_size() const
Total number of muffin-tin basis functions (APW + LO).
Definition: atom_type.hpp:880
auto const & indexr() const
Return const reference to the index of radial functions.
Definition: atom_type.hpp:817
bool augment() const
Return true if this atom type has an augementation charge.
Definition: atom_type.hpp:569
void generate_pw_coeffs()
Generate Q_{xi,xi'}(G) plane wave coefficients.
void generate_pw_coeffs_gvec_deriv(int nu__)
Generate G-vector derivative Q_{xi,xi'}(G)/dG of the plane-wave coefficients *‍/.
Compact storage of non-zero Gaunt coefficients .
Definition: gaunt.hpp:58
auto sum_L3_gaunt(int lm1, int lm2, std::complex< double > const *v) const
Return a sum over L3 (lm3) index of Gaunt coefficients and a complex vector.
Definition: gaunt.hpp:173
static double gaunt_rrr(int l1, int l2, int l3, int m1, int m2, int m3)
Gaunt coefficent of three real spherical harmonics.
Definition: sht.hpp:423
int count() const
Number of G-vectors for a fine-grained distribution for the current MPI rank.
Definition: gvec.hpp:506
double omega() const
Return the volume of the real space unit cell that corresponds to the reciprocal lattice of G-vectors...
Definition: gvec.hpp:471
r3::vector< double > gvec_cart(int ig__) const
Return G vector in Cartesian coordinates.
Definition: gvec.hpp:574
void bcast(T *buffer__, int count__, int root__) const
Perform buffer broadcast.
void allreduce(T *buffer__, int count__) const
Perform the in-place (the output buffer is used as the input buffer) all-to-all reduction.
int rank() const
Rank of MPI process inside communicator.
void zero(memory_t mem__, size_t idx0__, size_t n__)
Zero n elements starting from idx0.
Definition: memory.hpp:1316
T checksum(size_t idx0__, size_t size__) const
Compute checksum.
Definition: memory.hpp:1262
int lmmax(int lmax)
Maximum number of combinations for a given .
Definition: specfunc.hpp:44
void dRlm_dr(int lmax__, r3::vector< double > &r__, sddk::mdarray< double, 2 > &data__, bool divide_by_r__=true)
Compute the derivatives of real spherical harmonics over the components of cartesian vector.
Definition: specfunc.hpp:512
void spherical_harmonics(int lmax, double theta, double phi, std::complex< double > *ylm)
Optimized implementation of complex spherical harmonics.
Definition: specfunc.hpp:298
Namespace of the SIRIUS library.
Definition: sirius.f90:5
const double fourpi
Definition: constants.hpp:48
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165
int packed_index(int i__, int j__)
Pack two indices into one for symmetric matrices.