SIRIUS 7.5.0
Electronic structure library and applications
poisson.cpp
1// Copyright (c) 2013-2017 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file poisson.hpp
21 *
22 * \brief Implementation of the Poisson solver for the full-potential muffin-tin case.
23 */
24
25#include "potential.hpp"
26#include "core/profiler.hpp"
27#include "lapw/sum_fg_fl_yg.hpp"
28
29namespace sirius {
30
31double density_residual_hartree_energy(Density const& rho1__, Density const& rho2__)
32{
33 double eh{0};
34 auto const& gv = rho1__.ctx().gvec();
35 #pragma omp parallel for reduction(+:eh)
36 for (int igloc = gv.skip_g0(); igloc < gv.count(); igloc++) {
37 auto z = rho1__.component(0).rg().f_pw_local(igloc) - rho2__.component(0).rg().f_pw_local(igloc);
38 double g = gv.gvec_len<index_domain_t::local>(igloc);
39 eh += (std::pow(z.real(), 2) + std::pow(z.imag(), 2)) / std::pow(g, 2);
40 }
41 gv.comm().allreduce(&eh, 1);
42 eh *= twopi * rho1__.ctx().unit_cell().omega();
43 if (gv.reduced()) {
44 eh *= 2;
45 }
46 return eh;
47}
48
49void Potential::poisson_add_pseudo_pw(sddk::mdarray<std::complex<double>, 2>& qmt__,
50 sddk::mdarray<std::complex<double>, 2>& qit__,
51 std::complex<double>* rho_pw__)
52{
53 PROFILE("sirius::Potential::poisson_add_pseudo_pw");
54
55 int lmmax = ctx_.lmmax_rho();
56 int ngv = ctx_.gvec().count();
57
58 /* The following term is added to the plane-wave coefficients of the charge density:
59 * Integrate[SphericalBesselJ[l,a*x]*p[x,R]*x^2,{x,0,R},Assumptions->{l>=0,n>=0,R>0,a>0}] /
60 * Integrate[p[x,R]*x^(2+l),{x,0,R},Assumptions->{h>=0,n>=0,R>0}]
61 * i.e. contributon from pseudodensity to l-th channel of plane wave expansion multiplied by
62 * the difference bethween true and interstitial-in-the-mt multipole moments and divided by the
63 * moment of the pseudodensity.
64 */
65 for (int iat = 0; iat < unit_cell_.num_atom_types(); iat++) {
66 double R = unit_cell_.atom_type(iat).mt_radius();
67 int na = unit_cell_.atom_type(iat).num_atoms();
68
72
73 switch (ctx_.processing_unit()) {
74 case sddk::device_t::CPU: {
75 auto& mp = get_memory_pool(sddk::memory_t::host);
76 pf = sddk::mdarray<std::complex<double>, 2>(ngv, na, mp);
78 qapf = sddk::mdarray<std::complex<double>, 2>(lmmax, ngv, mp);
79 break;
80 }
81 case sddk::device_t::GPU: {
82 auto& mp = get_memory_pool(sddk::memory_t::host);
83 auto& mpd = get_memory_pool(sddk::memory_t::device);
84 /* allocate on GPU */
85 pf = sddk::mdarray<std::complex<double>, 2>(nullptr, ngv, na);
86 pf.allocate(mpd);
87 /* allocate on CPU & GPU */
89 qa.allocate(mpd);
90 /* allocate on CPU & GPU */
91 qapf = sddk::mdarray<std::complex<double>, 2>(lmmax, ngv, mp);
92 qapf.allocate(mpd);
93 break;
94 }
95 }
96
97 ctx_.generate_phase_factors(iat, pf);
98
99 for (int i = 0; i < unit_cell_.atom_type(iat).num_atoms(); i++) {
100 int ia = unit_cell_.atom_type(iat).atom_id(i);
101 for (int lm = 0; lm < ctx_.lmmax_rho(); lm++) {
102 qa(lm, i) = qmt__(lm, ia) - qit__(lm, ia);
103 }
104 }
105
106 switch (ctx_.processing_unit()) {
107 case sddk::device_t::CPU: {
108 la::wrap(la::lib_t::blas).gemm('N', 'C', ctx_.lmmax_rho(), ctx_.gvec().count(),
109 unit_cell_.atom_type(iat).num_atoms(), &la::constant<std::complex<double>>::one(),
110 qa.at(sddk::memory_t::host), qa.ld(), pf.at(sddk::memory_t::host), pf.ld(),
111 &la::constant<std::complex<double>>::zero(), qapf.at(sddk::memory_t::host), qapf.ld());
112 break;
113 }
114 case sddk::device_t::GPU: {
115 qa.copy_to(sddk::memory_t::device);
116 la::wrap(la::lib_t::gpublas).gemm('N', 'C', ctx_.lmmax_rho(), ctx_.gvec().count(),
117 unit_cell_.atom_type(iat).num_atoms(), &la::constant<std::complex<double>>::one(),
118 qa.at(sddk::memory_t::device), qa.ld(), pf.at(sddk::memory_t::device), pf.ld(),
119 &la::constant<std::complex<double>>::zero(), qapf.at(sddk::memory_t::device), qapf.ld());
120 qapf.copy_to(sddk::memory_t::host);
121 break;
122 }
123 }
124
125 double fourpi_omega = fourpi / unit_cell_.omega();
126
127 /* add pseudo_density to interstitial charge density so that rho(G) has the correct
128 * multipole moments in the muffin-tins */
129 #pragma omp parallel for schedule(static)
130 for (int igloc = ctx_.gvec().skip_g0(); igloc < ctx_.gvec().count(); igloc++) {
131 double gR = ctx_.gvec().gvec_len<index_domain_t::local>(igloc) * R;
132 double gRn = std::pow(2.0 / gR, pseudo_density_order_ + 1);
133
134 std::complex<double> rho_G(0, 0);
135 /* loop over atoms of the same type */
136 for (int l = 0, lm = 0; l <= ctx_.lmax_rho(); l++) {
137 std::complex<double> zt1(0, 0);
138 for (int m = -l; m <= l; m++, lm++) {
139 zt1 += gvec_ylm_(lm, igloc) * qapf(lm, igloc);
140 }
141 rho_G += fourpi_omega * std::conj(zil_[l]) * zt1 * gamma_factors_R_(l, iat) *
142 sbessel_mt_(l + pseudo_density_order_ + 1, igloc, iat) * gRn;
143 }
144 rho_pw__[igloc] += rho_G;
145 }
146 /* for G=0 case */
147 if (ctx_.comm().rank() == 0) {
148 std::complex<double> rho_G(0, 0);
149 for (int i = 0; i < unit_cell_.atom_type(iat).num_atoms(); i++) {
150 int ia = unit_cell_.atom_type(iat).atom_id(i);
151 rho_G += fourpi_omega * y00 * (qmt__(0, ia) - qit__(0, ia));
152 }
153 rho_pw__[0] += rho_G;
154 }
155 }
156}
157
159{
160 PROFILE("sirius::Potential::poisson");
161
162 /* in case of full potential we need to do pseudo-charge multipoles */
163 if (ctx_.full_potential()) {
164
165 /* true multipole moments */
166 auto qmt = poisson_vmt(rho);
167
168 if (env::print_checksum()) {
169 print_checksum("qmt", qmt.checksum(), ctx_.out());
170 }
171
172 /* compute multipoles of interstitial density in MT region */
173 auto qit = sum_fg_fl_yg(ctx_, ctx_.lmax_rho(), &rho.rg().f_pw_local(0), sbessel_mom_, gvec_ylm_);
174
175 if (env::print_checksum()) {
176 print_checksum("qit", qit.checksum(), ctx_.out());
177 }
178
179 /* add contribution from the pseudo-charge */
180 poisson_add_pseudo_pw(qmt, qit, const_cast<std::complex<double>*>(&rho.rg().f_pw_local(0)));
181
182 if (ctx_.cfg().control().verification() >= 2) {
183 auto qit = sum_fg_fl_yg(ctx_, ctx_.lmax_rho(), &rho.rg().f_pw_local(0), sbessel_mom_, gvec_ylm_);
184
185 double d = 0.0;
186 for (int ia = 0; ia < unit_cell_.num_atoms(); ia++) {
187 for (int lm = 0; lm < ctx_.lmmax_rho(); lm++) {
188 d += std::abs(qmt(lm, ia) - qit(lm, ia));
189 }
190 }
191 if (ctx_.verbosity() >= 1) {
192 RTE_OUT(ctx_.out()) << "pseudocharge error: " << d << std::endl;
193 }
194 }
195 }
196
197 /* compute pw coefficients of Hartree potential */
198 if (ctx_.gvec().comm().rank() == 0) {
199 hartree_potential_->rg().f_pw_local(0) = 0.0;
200 }
201 if (!ctx_.molecule()) {
202 #pragma omp parallel for
203 for (int igloc = ctx_.gvec().skip_g0(); igloc < ctx_.gvec().count(); igloc++) {
204 hartree_potential_->rg().f_pw_local(igloc) = fourpi * rho.rg().f_pw_local(igloc) /
205 std::pow(ctx_.gvec().gvec_len<index_domain_t::local>(igloc), 2);
206 }
207 } else {
208 /* reference paper:
209
210 Supercell technique for total-energy calculations of finite charged and polar systems
211 M. R. Jarvis, I. D. White, R. W. Godby, and M. C. Payne
212 Phys. Rev. B 56, 14972 – Published 15 December 1997
213 DOI:https://doi.org/10.1103/PhysRevB.56.14972
214 */
215 double R_cut = 0.5 * std::pow(unit_cell_.omega(), 1.0 / 3);
216 #pragma omp parallel for
217 for (int igloc = ctx_.gvec().skip_g0(); igloc < ctx_.gvec().count(); igloc++) {
218 auto glen = ctx_.gvec().gvec_len<index_domain_t::local>(igloc);
219 hartree_potential_->rg().f_pw_local(igloc) = (fourpi * rho.rg().f_pw_local(igloc) / std::pow(glen, 2)) *
220 (1.0 - std::cos(glen * R_cut));
221 }
222 }
223
224 //if (ctx_.control().print_checksum_) {
225 // auto z4 = mdarray<std::complex<double>, 1>(&vh->f_pw(0), ctx_.gvec().num_gvec()).checksum();
226 // if (ctx_.comm().rank() == 0) {
227 // DUMP("checksum(vh_pw): %20.14f %20.14f", z4.real(), z4.imag());
228 // }
229 //}
230
231 /* boundary condition for muffin-tins */
232 if (ctx_.full_potential()) {
233 /* compute V_lm at the MT boundary */
234 auto vmtlm = sum_fg_fl_yg(ctx_, ctx_.lmax_pot(), &hartree_potential_->rg().f_pw_local(0), sbessel_mt_, gvec_ylm_);
235
236 /* add boundary condition and convert to Rlm */
237 PROFILE("sirius::Potential::poisson|bc");
239 for (int iat = 0; iat < unit_cell_.num_atom_types(); iat++) {
240 int nmtp = unit_cell_.atom_type(iat).num_mt_points();
241 double R = unit_cell_.atom_type(iat).mt_radius();
242
243 #pragma omp parallel for default(shared)
244 for (int l = 0; l <= ctx_.lmax_pot(); l++) {
245 for (int ir = 0; ir < nmtp; ir++) {
246 rRl(ir, l, iat) = std::pow(unit_cell_.atom_type(iat).radial_grid(ir) / R, l);
247 }
248 }
249 }
250
251 for (auto it : unit_cell_.spl_num_atoms()) {
252 int nmtp = unit_cell_.atom(it.i).num_mt_points();
253
254 std::vector<double> vlm(ctx_.lmmax_pot());
255 SHT::convert(ctx_.lmax_pot(), &vmtlm(0, it.i), &vlm[0]);
256
257 #pragma omp parallel for default(shared)
258 for (int lm = 0; lm < ctx_.lmmax_pot(); lm++) {
259 int l = l_by_lm_[lm];
260
261 for (int ir = 0; ir < nmtp; ir++) {
262 hartree_potential_->mt()[it.i](lm, ir) += vlm[lm] * rRl(ir, l, unit_cell_.atom(it.i).type_id());
263 }
264 }
265 /* save electronic part of the potential at the point of origin */
266#ifdef __VHA_AUX
267 vh_el_(it.i) = y00 * hartree_potential_->mt()[it.i](0, 0) +
268 unit_cell_.atom(it.i).zn() / unit_cell_.atom(it.i).radial_grid(0);
269#else
270 vh_el_(it.i) = y00 * hartree_potential_->mt()[it.i](0, 0);
271#endif
272 }
273 ctx_.comm().allgather(vh_el_.at(sddk::memory_t::host), unit_cell_.spl_num_atoms().local_size(),
274 unit_cell_.spl_num_atoms().global_offset());
275 }
276
277 /* transform Hartree potential to real space */
278 hartree_potential_->rg().fft_transform(1);
279
280 if (env::print_checksum()) {
281 auto cs = hartree_potential_->rg().checksum_rg();
282 auto cs1 = hartree_potential_->rg().checksum_pw();
283 print_checksum("vha_rg", cs, ctx_.out());
284 print_checksum("vha_pw", cs1, ctx_.out());
285 }
286
287 /* compute contribution from the smooth part of Hartree potential */
288 energy_vha_ = sirius::inner(rho, hartree_potential());
289
290#ifndef __VHA_AUX
291 /* add nucleus potential and contribution to Hartree energy */
292 if (ctx_.full_potential()) {
293 double evha_nuc{0};
294 for (auto it : unit_cell_.spl_num_atoms()) {
295 auto& atom = unit_cell_.atom(it.i);
296 Spline<double> srho(atom.radial_grid());
297 for (int ir = 0; ir < atom.num_mt_points(); ir++) {
298 double r = atom.radial_grid(ir);
299 hartree_potential_->mt()[it.i](0, ir) -= atom.zn() / r / y00;
300 srho(ir) = rho.mt()[it.i](0, ir) * r;
301 }
302 evha_nuc -= atom.zn() * srho.interpolate().integrate(0) / y00;
303 }
304 ctx_.comm().allreduce(&evha_nuc, 1);
305 energy_vha_ += evha_nuc;
306 }
307#endif
308
309 /* check values at MT boundary */
310 //if (true) {
311 // /* compute V_lm at the MT boundary */
312 // auto vmtlm = ctx_.sum_fg_fl_yg(ctx_.lmax_pot(), &hartree_potential_->f_pw_local(0), sbessel_mt_, gvec_ylm_);
313
314 // for (int ialoc = 0; ialoc < unit_cell_.spl_num_atoms().local_size(); ialoc++) {
315 // int ia = unit_cell_.spl_num_atoms(ialoc);
316 // int nmtp = unit_cell_.atom(ia).num_mt_points();
317
318 // std::vector<double> vlm(ctx_.lmmax_pot());
319 // SHT::convert(ctx_.lmax_pot(), &vmtlm(0, ia), &vlm[0]);
320
321 // for (int lm = 0; lm < ctx_.lmmax_pot(); lm++) {
322 // printf("ia=%i lm=%i vlmdiff=%20.16f\n", ia, lm,
323 // std::abs(hartree_potential_->f_mt<index_domain_t::local>(lm, nmtp - 1, ialoc) - vlm[lm]));
324 // }
325 // }
326
327 //}
328}
329
330} // namespace sirius
int atom_id(int idx) const
Return atom ID (global index) by the index of atom within a given type.
Definition: atom_type.hpp:940
int num_mt_points() const
Return number of muffin-tin radial grid points.
Definition: atom_type.hpp:718
double mt_radius() const
Return muffin-tin radius.
Definition: atom_type.hpp:712
int num_atoms() const
Return number of atoms of a given type.
Definition: atom_type.hpp:934
int type_id() const
Return atom type id.
Definition: atom.hpp:336
Representation of the periodical function on the muffin-tin geometry.
auto & mt()
Return reference to spherical functions component.
auto & rg()
Return reference to regular space grid component.
void poisson(Periodic_function< double > const &rho)
Poisson solver.
Definition: poisson.cpp:158
std::unique_ptr< Periodic_function< double > > hartree_potential_
Hartree potential.
Definition: potential.hpp:55
Unit_cell & unit_cell_
Alias to unit cell.
Definition: potential.hpp:49
sddk::mdarray< double, 1 > vh_el_
Electronic part of Hartree potential.
Definition: potential.hpp:101
auto poisson_vmt(Periodic_function< double > const &rho__) const
Compute MT part of the potential and MT multipole moments.
Definition: potential.hpp:160
sddk::mdarray< double, 3 > sbessel_mom_
Moments of the spherical Bessel functions.
Definition: potential.hpp:79
void poisson_add_pseudo_pw(sddk::mdarray< std::complex< double >, 2 > &qmt__, sddk::mdarray< std::complex< double >, 2 > &qit__, std::complex< double > *rho_pw__)
Add contribution from the pseudocharge to the plane-wave expansion.
Definition: poisson.cpp:49
static void convert(int lmax__, double const *f_rlm__, std::complex< double > *f_ylm__)
Convert form Rlm to Ylm representation.
Definition: sht.hpp:264
auto const & gvec() const
Return const reference to Gvec object.
void generate_phase_factors(int iat__, sddk::mdarray< std::complex< double >, 2 > &phase_factors__) const
Generate phase factors for all atoms of a given type.
std::ostream & out() const
Return output stream.
mpi::Communicator const & comm() const
Total communicator of the simulation.
Spline< T, U > & interpolate()
Definition: spline.hpp:275
double omega() const
Unit cell volume.
Definition: unit_cell.hpp:275
int max_num_mt_points() const
Maximum number of muffin-tin points among all atom types.
Definition: unit_cell.hpp:397
Atom const & atom(int id__) const
Return const atom instance by id.
Definition: unit_cell.hpp:344
int num_atom_types() const
Number of atom types.
Definition: unit_cell.hpp:281
Atom_type & atom_type(int id__)
Return atom type instance by id.
Definition: unit_cell.hpp:288
int num_atoms() const
Number of atoms in the unit cell.
Definition: unit_cell.hpp:338
void gemm(char transa, char transb, ftn_int m, ftn_int n, ftn_int k, T const *alpha, T const *A, ftn_int lda, T const *B, ftn_int ldb, T const *beta, T *C, ftn_int ldc, acc::stream_id sid=acc::stream_id(-1)) const
General matrix-matrix multiplication.
void allgather(T *buffer__, int const *recvcounts__, int const *displs__) const
In-place MPI_Allgatherv.
void allreduce(T *buffer__, int count__) const
Perform the in-place (the output buffer is used as the input buffer) all-to-all reduction.
int rank() const
Rank of MPI process inside communicator.
Multidimensional array with the column-major (Fortran) order.
Definition: memory.hpp:660
void copy_to(memory_t mem__, size_t idx0__, size_t n__, acc::stream_id sid=acc::stream_id(-1))
Copy n elements starting from idx0 from one memory type to another.
Definition: memory.hpp:1339
uint32_t ld() const
Return leading dimension size.
Definition: memory.hpp:1233
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Definition: memory.hpp:1057
@ gpublas
GPU BLAS (cuBlas or ROCblas)
int lmmax(int lmax)
Maximum number of combinations for a given .
Definition: specfunc.hpp:44
int lm(int l, int m)
Get composite lm index by angular index l and azimuthal index m.
Definition: specfunc.hpp:50
Namespace of the SIRIUS library.
Definition: sirius.f90:5
const double y00
First spherical harmonic .
Definition: constants.hpp:51
const double twopi
Definition: constants.hpp:45
const double fourpi
Definition: constants.hpp:48
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165
auto sum_fg_fl_yg(Simulation_context const &ctx__, int lmax__, std::complex< double > const *fpw__, sddk::mdarray< double, 3 > &fl__, sddk::matrix< std::complex< double > > &gvec_ylm__)
Contains declaration and partial implementation of sirius::Potential class.
A time-based profiler.
LAPW specific function to compute sum over plane-wave coefficients and spherical harmonics.