SIRIUS 7.5.0
Electronic structure library and applications
stress.cpp
Go to the documentation of this file.
1// Copyright (c) 2013-2019 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file stress.cpp
21 *
22 * \brief Contains implementation of sirius::Stress class.
23 */
24
25#include "core/r3/r3.hpp"
26#include "core/profiler.hpp"
27#include "k_point/k_point.hpp"
28#include "non_local_functor.hpp"
29#include "dft/energy.hpp"
31#include "stress.hpp"
32
33namespace sirius {
34
35template <typename T, typename F>
36void
38{
39 PROFILE("sirius::Stress|nonloc");
40
41 sddk::mdarray<real_type<F>, 2> collect_result(9, ctx_.unit_cell().num_atoms());
42 collect_result.zero();
43
44 stress_nonloc_.zero();
45
46 /* if there are no beta projectors then get out there */
47 if (ctx_.unit_cell().max_mt_basis_size() == 0) {
48 return;
49 }
50
51 print_memory_usage(ctx_.out(), FILE_LINE);
52
53 for (auto it : kset_.spl_num_kpoints()) {
54 auto kp = kset_.get<T>(it.i);
55 auto mem = ctx_.processing_unit_memory_t();
56 auto mg = kp->spinor_wave_functions().memory_guard(mem, wf::copy_to::device);
57 Beta_projectors_strain_deriv<T> bp_strain_deriv(ctx_, kp->gkvec());
58
59 add_k_point_contribution_nonlocal<T, F>(ctx_, bp_strain_deriv, *kp, collect_result);
60 }
61
62 #pragma omp parallel
63 {
64 r3::matrix<double> tmp_stress; // TODO: test pragma omp parallel for reduction(+:stress)
65
66 #pragma omp for
67 for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
68 for (int i = 0; i < 3; i++) {
69 for (int j = 0; j < 3; j++) {
70 tmp_stress(i, j) -= collect_result(j * 3 + i, ia);
71 }
72 }
73 }
74
75 #pragma omp critical
76 stress_nonloc_ += tmp_stress;
77 }
78
79 ctx_.comm().allreduce(&stress_nonloc_(0, 0), 9);
80
81 stress_nonloc_ *= (1.0 / ctx_.unit_cell().omega());
82
83 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_nonloc_);
84}
85
87Stress::calc_stress_total()
88{
89 calc_stress_kin();
96 calc_stress_nonloc();
97 stress_hubbard_.zero();
98 if (ctx_.hubbard_correction()) {
99 calc_stress_hubbard();
100 }
101 stress_total_.zero();
102
103 for (int mu = 0; mu < 3; mu++) {
104 for (int nu = 0; nu < 3; nu++) {
105 stress_total_(mu, nu) = stress_kin_(mu, nu) + stress_har_(mu, nu) + stress_ewald_(mu, nu) +
106 stress_vloc_(mu, nu) + stress_core_(mu, nu) + stress_xc_(mu, nu) +
107 stress_us_(mu, nu) + stress_nonloc_(mu, nu) + stress_hubbard_(mu, nu);
108 }
109 }
110 return stress_total_;
111}
112
113r3::matrix<double>
114Stress::calc_stress_hubbard()
115{
116 stress_hubbard_.zero();
117 auto r = ctx_.unit_cell().num_hubbard_wf();
118 /* if there are no beta projectors then get out there */
119 /* TODO : Need to fix the case where pp have no beta projectors */
120 if (ctx_.unit_cell().max_mt_basis_size() == 0) {
121 RTE_THROW("Hubbard forces : Your pseudo potentials do not have beta projectors. This need a proper fix");
122 return stress_hubbard_;
123 }
124
125 Q_operator<double> q_op(ctx_);
126
127 auto nhwf = ctx_.unit_cell().num_hubbard_wf().first;
128
129 sddk::mdarray<std::complex<double>, 4> dn(nhwf, nhwf, 2, 9);
131 dn.allocate(ctx_.processing_unit_memory_t());
132 }
133 for (auto it : kset_.spl_num_kpoints()) {
134 auto kp = kset_.get<double>(it.i);
135 dn.zero();
137 dn.zero(ctx_.processing_unit_memory_t());
138 }
139 // kp->beta_projectors().prepare();
140 auto mg1 = kp->spinor_wave_functions().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);
141 auto mg2 = kp->hubbard_wave_functions_S().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);
142 auto mg3 = kp->atomic_wave_functions().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);
143 auto mg4 = kp->atomic_wave_functions_S().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);
144
145 if (ctx_.num_mag_dims() == 3) {
146 RTE_THROW("Hubbard stress correction is only implemented for the simple hubbard correction.");
147 }
148
149 /* compute the derivative of the occupancies numbers */
150 potential_.U().compute_occupancies_stress_derivatives(*kp, q_op, dn);
151 for (int dir1 = 0; dir1 < 3; dir1++) {
152 for (int dir2 = 0; dir2 < 3; dir2++) {
153 for (int at_lvl = 0; at_lvl < static_cast<int>(potential_.hubbard_potential().local().size());
154 at_lvl++) {
155 const int ia1 = potential_.hubbard_potential().atomic_orbitals(at_lvl).first;
156 const auto& atom = ctx_.unit_cell().atom(ia1);
157 const int lo = potential_.hubbard_potential().atomic_orbitals(at_lvl).second;
158 if (atom.type().lo_descriptor_hub(lo).use_for_calculation()) {
159 const int lmax_at = 2 * atom.type().lo_descriptor_hub(lo).l() + 1;
160 const int offset = potential_.hubbard_potential().offset(at_lvl);
161 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
162 for (int m1 = 0; m1 < lmax_at; m1++) {
163 for (int m2 = 0; m2 < lmax_at; m2++) {
164 stress_hubbard_(dir1, dir2) -=
165 (potential_.hubbard_potential().local(at_lvl)(m2, m1, ispn) *
166 dn(offset + m1, offset + m2, ispn, dir1 + 3 * dir2))
167 .real() /
168 ctx_.unit_cell().omega();
169 }
170 }
171 }
172 }
173 }
174
175 double d = 0;
176 for (int i = 0; i < ctx_.cfg().hubbard().nonlocal().size(); i++) {
177 auto nl = ctx_.cfg().hubbard().nonlocal(i);
178 int ia = nl.atom_pair()[0];
179 int ja = nl.atom_pair()[1];
180 int il = nl.l()[0];
181 int jl = nl.l()[1];
182 int in = nl.n()[0];
183 int jn = nl.n()[1];
184 auto Tr = nl.T();
185
186 auto z1 = std::exp(std::complex<double>(0, -twopi * dot(r3::vector<int>(Tr), kp->vk())));
187 const int at_lvl1 = potential_.hubbard_potential().find_orbital_index(ia, in, il);
188 const int at_lvl2 = potential_.hubbard_potential().find_orbital_index(ja, jn, jl);
189 const int offset1 = potential_.hubbard_potential().offset(at_lvl1);
190 const int offset2 = potential_.hubbard_potential().offset(at_lvl2);
191
192 for (int is = 0; is < ctx_.num_spins(); is++) {
193 for (int m2 = 0; m2 < 2 * jl + 1; m2++) {
194 for (int m1 = 0; m1 < 2 * il + 1; m1++) {
195 auto result1_ = z1 * std::conj(dn(offset2 + m2, offset1 + m1, is, dir1 + 3 * dir2)) *
196 potential_.hubbard_potential().nonlocal(i)(m1, m2, is);
197 d += std::real(result1_);
198 }
199 }
200 }
201 }
202 stress_hubbard_(dir1, dir2) -= d / ctx_.unit_cell().omega();
203 }
204 }
205 }
206
207 /* global reduction */
208 kset_.comm().allreduce(&stress_hubbard_(0, 0), 9);
209 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_hubbard_);
210
211 stress_hubbard_ = -1.0 * stress_hubbard_;
212
213 return stress_hubbard_;
214}
215
216r3::matrix<double>
218{
219 stress_core_.zero();
220
221 bool empty = true;
222
223 /* check if the core atomic wave functions are set up or not */
224 /* if not then the core correction is simply ignored */
225
226 for (int ia = 0; (ia < ctx_.unit_cell().num_atoms()) && empty; ia++) {
227 Atom& atom = ctx_.unit_cell().atom(ia);
228 if (!atom.type().ps_core_charge_density().empty()) {
229 empty = false;
230 }
231 }
232
233 if (empty) {
234 return stress_core_;
235 }
236
237 potential_.xc_potential().rg().fft_transform(-1);
238
239 auto q = ctx_.gvec().shells_len();
240 auto const ff = ctx_.ri().ps_core_djl_->values(q, ctx_.comm());
241 auto drhoc = make_periodic_function<index_domain_t::local>(ctx_.unit_cell(), ctx_.gvec(),
242 ctx_.phase_factors_t(), ff);
243
244 double sdiag{0};
245 int ig0 = ctx_.gvec().skip_g0();
246
247 for (int igloc = ig0; igloc < ctx_.gvec().count(); igloc++) {
248 auto G = ctx_.gvec().gvec_cart<index_domain_t::local>(igloc);
249 auto g = G.length();
250
251 for (int mu : {0, 1, 2}) {
252 for (int nu : {0, 1, 2}) {
253 stress_core_(mu, nu) -=
254 std::real(std::conj(potential_.xc_potential().rg().f_pw_local(igloc)) * drhoc[igloc]) *
255 G[mu] * G[nu] / g;
256 }
257 }
258
259 sdiag += std::real(std::conj(potential_.xc_potential().rg().f_pw_local(igloc)) *
260 density_.rho_pseudo_core().f_pw_local(igloc));
261 }
262
263 if (ctx_.gvec().reduced()) {
264 stress_core_ *= 2;
265 sdiag *= 2;
266 }
267 if (ctx_.comm().rank() == 0) {
268 sdiag +=
269 std::real(std::conj(potential_.xc_potential().rg().f_pw_local(0)) * density_.rho_pseudo_core().f_pw_local(0));
270 }
271
272 for (int mu : {0, 1, 2}) {
273 stress_core_(mu, mu) -= sdiag;
274 }
275
276 ctx_.comm().allreduce(&stress_core_(0, 0), 9);
277
278 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_core_);
279
280 return stress_core_;
281}
282
285{
286 stress_xc_.zero();
287
288 double e = sirius::energy_exc(density_, potential_) - sirius::energy_vxc(density_, potential_) -
289 sirius::energy_bxc(density_, potential_);
290
291 for (int l = 0; l < 3; l++) {
292 stress_xc_(l, l) = e / ctx_.unit_cell().omega();
293 }
294
295 if (potential_.is_gradient_correction()) {
296
298
299 /* factor 2 in the expression for gradient correction comes from the
300 derivative of sigm (which is grad(rho) * grad(rho)) */
301
302 if (ctx_.num_spins() == 1) {
303 Smooth_periodic_function<double> rhovc(ctx_.spfft<double>(), ctx_.gvec_fft_sptr());
304 rhovc.zero();
305 rhovc += density_.rho().rg();
306 rhovc += density_.rho_pseudo_core();
307
308 /* transform to PW domain */
309 rhovc.fft_transform(-1);
310
311 /* generate pw coeffs of the gradient */
312 auto grad_rho = gradient(rhovc);
313
314 /* gradient in real space */
315 for (int x : {0, 1, 2}) {
316 grad_rho[x].fft_transform(1);
317 }
318
319 for (int irloc = 0; irloc < ctx_.spfft<double>().local_slice_size(); irloc++) {
320 for (int mu = 0; mu < 3; mu++) {
321 for (int nu = 0; nu < 3; nu++) {
322 t(mu, nu) +=
323 2 * grad_rho[mu].value(irloc) * grad_rho[nu].value(irloc) * potential_.vsigma(0).value(irloc);
324 }
325 }
326 }
327 } else {
328 auto result = get_rho_up_dn<true>(density_);
329 auto& rho_up = *result[0];
330 auto& rho_dn = *result[1];
331
332 /* transform to PW domain */
333 rho_up.fft_transform(-1);
334 rho_dn.fft_transform(-1);
335
336 /* generate pw coeffs of the gradient */
337 auto grad_rho_up = gradient(rho_up);
338 auto grad_rho_dn = gradient(rho_dn);
339
340 /* gradient in real space */
341 for (int x : {0, 1, 2}) {
342 grad_rho_up[x].fft_transform(1);
343 grad_rho_dn[x].fft_transform(1);
344 }
345
346 for (int irloc = 0; irloc < ctx_.spfft<double>().local_slice_size(); irloc++) {
347 for (int mu = 0; mu < 3; mu++) {
348 for (int nu = 0; nu < 3; nu++) {
349 t(mu, nu) += grad_rho_up[mu].value(irloc) * grad_rho_up[nu].value(irloc) * 2 *
350 potential_.vsigma(0).value(irloc) +
351 (grad_rho_up[mu].value(irloc) * grad_rho_dn[nu].value(irloc) +
352 grad_rho_dn[mu].value(irloc) * grad_rho_up[nu].value(irloc)) *
353 potential_.vsigma(1).value(irloc) +
354 grad_rho_dn[mu].value(irloc) * grad_rho_dn[nu].value(irloc) * 2 *
355 potential_.vsigma(2).value(irloc);
356 }
357 }
358 }
359 }
360 mpi::Communicator(ctx_.spfft<double>().communicator()).allreduce(&t(0, 0), 9);
361 t *= (-1.0 / ctx_.fft_grid().num_points());
362 stress_xc_ += t;
363 }
364
365 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_xc_);
366
367 return stress_xc_;
368}
369
372{
373 PROFILE("sirius::Stress|us");
374
375 stress_us_.zero();
376
377 /* check if we have beta projectors. Only for pseudo potentials */
378 if (ctx_.unit_cell().max_mt_basis_size() == 0) {
379 return stress_us_;
380 }
381
382 potential_.fft_transform(-1);
383
385 sddk::memory_t qmem{sddk::memory_t::none};
386
387 sddk::memory_pool* mp{nullptr};
388 switch (ctx_.processing_unit()) {
389 case sddk::device_t::CPU: {
390 mp = &get_memory_pool(sddk::memory_t::host);
391 la = la::lib_t::blas;
392 qmem = sddk::memory_t::host;
393 break;
394 }
395 case sddk::device_t::GPU: {
396 mp = &get_memory_pool(sddk::memory_t::host_pinned);
397 la = la::lib_t::spla;
398 qmem = sddk::memory_t::host;
399 break;
400 }
401 }
402
403 for (int iat = 0; iat < ctx_.unit_cell().num_atom_types(); iat++) {
404 auto& atom_type = ctx_.unit_cell().atom_type(iat);
405 if (!atom_type.augment() || atom_type.num_atoms() == 0) {
406 continue;
407 }
408
409 Augmentation_operator q_deriv(ctx_.unit_cell().atom_type(iat), ctx_.gvec(), *ctx_.ri().aug_,
410 *ctx_.ri().aug_djl_);
411
412 auto nbf = atom_type.mt_basis_size();
413
414 /* get auxiliary density matrix */
415 auto dm = density_.density_matrix_aux(atom_type);
416
417 sddk::mdarray<std::complex<double>, 2> phase_factors(atom_type.num_atoms(), ctx_.gvec().count(),
418 get_memory_pool(sddk::memory_t::host));
419
420 PROFILE_START("sirius::Stress|us|phase_fac");
421 #pragma omp parallel for
422 for (int igloc = 0; igloc < ctx_.gvec().count(); igloc++) {
423 int ig = ctx_.gvec().offset() + igloc;
424 for (int i = 0; i < atom_type.num_atoms(); i++) {
425 phase_factors(i, igloc) = ctx_.gvec_phase_factor(ig, atom_type.atom_id(i));
426 }
427 }
428 PROFILE_STOP("sirius::Stress|us|phase_fac");
429
430 sddk::mdarray<double, 2> v_tmp(atom_type.num_atoms(), ctx_.gvec().count() * 2, *mp);
431 sddk::mdarray<double, 2> tmp(nbf * (nbf + 1) / 2, atom_type.num_atoms(), *mp);
432 /* over spin components, can be from 1 to 4 */
433 for (int nu = 0; nu < 3; nu++) {
434 /* generate dQ(G)/dG */
436 for (int ispin = 0; ispin < ctx_.num_mag_dims() + 1; ispin++) {
437 for (int mu = 0; mu < 3; mu++) {
438 PROFILE_START("sirius::Stress|us|prepare");
439 int igloc0{0};
440 if (ctx_.comm().rank() == 0) {
441 for (int ia = 0; ia < atom_type.num_atoms(); ia++) {
442 v_tmp(ia, 0) = v_tmp(ia, 1) = 0;
443 }
444 igloc0 = 1;
445 }
446 #pragma omp parallel for
447 for (int igloc = igloc0; igloc < ctx_.gvec().count(); igloc++) {
448 auto gvc = ctx_.gvec().gvec_cart<index_domain_t::local>(igloc);
449 double g = gvc.length();
450
451 for (int ia = 0; ia < atom_type.num_atoms(); ia++) {
452 auto z = phase_factors(ia, igloc) * potential_.component(ispin).rg().f_pw_local(igloc) *
453 (-gvc[mu] / g);
454 v_tmp(ia, 2 * igloc) = z.real();
455 v_tmp(ia, 2 * igloc + 1) = z.imag();
456 }
457 }
458 PROFILE_STOP("sirius::Stress|us|prepare");
459
460 PROFILE_START("sirius::Stress|us|gemm");
461 la::wrap(la).gemm('N', 'T', nbf * (nbf + 1) / 2, atom_type.num_atoms(), 2 * ctx_.gvec().count(),
462 &la::constant<double>::one(), q_deriv.q_pw().at(qmem), q_deriv.q_pw().ld(),
463 v_tmp.at(sddk::memory_t::host), v_tmp.ld(), &la::constant<double>::zero(),
464 tmp.at(sddk::memory_t::host), tmp.ld());
465 PROFILE_STOP("sirius::Stress|us|gemm");
466
467 for (int ia = 0; ia < atom_type.num_atoms(); ia++) {
468 for (int i = 0; i < nbf * (nbf + 1) / 2; i++) {
469 stress_us_(mu, nu) += tmp(i, ia) * dm(i, ia, ispin) * q_deriv.sym_weight(i);
470 }
471 }
472 }
473 }
474 }
475 }
476
477 ctx_.comm().allreduce(&stress_us_(0, 0), 9);
478 if (ctx_.gvec().reduced()) {
479 stress_us_ *= 2;
480 }
481
482 stress_us_ *= (1.0 / ctx_.unit_cell().omega());
483
484 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_us_);
485
486 return stress_us_;
487}
488
491{
492 PROFILE("sirius::Stress|ewald");
493
494 stress_ewald_.zero();
495
496 double lambda = ctx_.ewald_lambda();
497
498 auto& uc = ctx_.unit_cell();
499
500 int ig0 = ctx_.gvec().skip_g0();
501 for (int igloc = ig0; igloc < ctx_.gvec().count(); igloc++) {
502 int ig = ctx_.gvec().offset() + igloc;
503
504 auto G = ctx_.gvec().gvec_cart<index_domain_t::local>(igloc);
505 double g2 = std::pow(G.length(), 2);
506 double g2lambda = g2 / 4.0 / lambda;
507
508 std::complex<double> rho(0, 0);
509
510 for (int ia = 0; ia < uc.num_atoms(); ia++) {
511 rho += ctx_.gvec_phase_factor(ig, ia) * static_cast<double>(uc.atom(ia).zn());
512 }
513
514 double a1 = twopi * std::pow(std::abs(rho) / uc.omega(), 2) * std::exp(-g2lambda) / g2;
515
516 for (int mu : {0, 1, 2}) {
517 for (int nu : {0, 1, 2}) {
518 stress_ewald_(mu, nu) += a1 * G[mu] * G[nu] * 2 * (g2lambda + 1) / g2;
519 }
520 }
521
522 for (int mu : {0, 1, 2}) {
523 stress_ewald_(mu, mu) -= a1;
524 }
525 }
526
527 if (ctx_.gvec().reduced()) {
528 stress_ewald_ *= 2;
529 }
530
531 ctx_.comm().allreduce(&stress_ewald_(0, 0), 9);
532
533 for (int mu : {0, 1, 2}) {
534 stress_ewald_(mu, mu) += twopi * std::pow(uc.num_electrons() / uc.omega(), 2) / 4 / lambda;
535 }
536
537 for (int ia = 0; ia < uc.num_atoms(); ia++) {
538 for (int i = 1; i < uc.num_nearest_neighbours(ia); i++) {
539 auto ja = uc.nearest_neighbour(i, ia).atom_id;
540 auto d = uc.nearest_neighbour(i, ia).distance;
541 auto rc = uc.nearest_neighbour(i, ia).rc;
542
543 double a1 = (0.5 * uc.atom(ia).zn() * uc.atom(ja).zn() / uc.omega() / std::pow(d, 3)) *
544 (-2 * std::exp(-lambda * std::pow(d, 2)) * std::sqrt(lambda / pi) * d -
545 std::erfc(std::sqrt(lambda) * d));
546
547 for (int mu : {0, 1, 2}) {
548 for (int nu : {0, 1, 2}) {
549 stress_ewald_(mu, nu) += a1 * rc[mu] * rc[nu];
550 }
551 }
552 }
553 }
554
555 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_ewald_);
556
557 return stress_ewald_;
558}
559
560void
561Stress::print_info(std::ostream& out__, int verbosity__) const
562{
563 auto print_stress = [&](std::string label__, r3::matrix<double> const& s) {
564 out__ << "=== " << label__ << " ===" << std::endl;
565 for (int mu : {0, 1, 2}) {
566 out__ << ffmt(12, 6) << s(mu, 0)
567 << ffmt(12, 6) << s(mu, 1)
568 << ffmt(12, 6) << s(mu, 2) << std::endl;
569 }
570 };
571
572 const double au2kbar = 2.94210119E5;
573 auto stress_kin = stress_kin_ * au2kbar;
574 auto stress_har = stress_har_ * au2kbar;
575 auto stress_ewald = stress_ewald_ * au2kbar;
576 auto stress_vloc = stress_vloc_ * au2kbar;
577 auto stress_xc = stress_xc_ * au2kbar;
578 auto stress_nonloc = stress_nonloc_ * au2kbar;
579 auto stress_us = stress_us_ * au2kbar;
580 auto stress_hubbard = stress_hubbard_ * au2kbar;
581 auto stress_core = stress_core_ * au2kbar;
582
583 out__ << "=== stress tensor components [kbar] ===" << std::endl;
584
585 print_stress("stress_kin", stress_kin);
586
587 print_stress("stress_har", stress_har);
588
589 print_stress("stress_ewald", stress_ewald);
590
591 print_stress("stress_vloc", stress_vloc);
592
593 print_stress("stress_xc", stress_xc);
594
595 print_stress("stress_core", stress_core);
596
597 print_stress("stress_nonloc", stress_nonloc);
598
599 print_stress("stress_us", stress_us);
600
601 stress_us = stress_us + stress_nonloc;
602 print_stress("stress_us_nl", stress_us);
603
604 if (ctx_.hubbard_correction()) {
605 print_stress("stress_hubbard", stress_hubbard);
606 }
607
608 auto stress_total = stress_total_ * au2kbar;
609 print_stress("stress_total", stress_total);
610}
611
612r3::matrix<double>
614{
615 PROFILE("sirius::Stress|har");
616
617 stress_har_.zero();
618
619 int ig0 = ctx_.gvec().skip_g0();
620 for (int igloc = ig0; igloc < ctx_.gvec().count(); igloc++) {
621 auto G = ctx_.gvec().gvec_cart<index_domain_t::local>(igloc);
622 double g2 = std::pow(G.length(), 2);
623 auto z = density_.rho().rg().f_pw_local(igloc);
624 double d = twopi * (std::pow(z.real(), 2) + std::pow(z.imag(), 2)) / g2;
625
626 for (int mu : {0, 1, 2}) {
627 for (int nu : {0, 1, 2}) {
628 stress_har_(mu, nu) += d * 2 * G[mu] * G[nu] / g2;
629 }
630 }
631 for (int mu : {0, 1, 2}) {
632 stress_har_(mu, mu) -= d;
633 }
634 }
635
636 if (ctx_.gvec().reduced()) {
637 stress_har_ *= 2;
638 }
639
640 ctx_.comm().allreduce(&stress_har_(0, 0), 9);
641
642 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_har_);
643
644 return stress_har_;
645}
646
647template <typename T>
648void
650{
651 stress_kin_.zero();
652
653 for (auto it : kset_.spl_num_kpoints()) {
654 auto kp = kset_.get<T>(it.i);
655
656 double fact = kp->gkvec().reduced() ? 2.0 : 1.0;
657 fact *= kp->weight();
658
659 #pragma omp parallel
660 {
662 for (int ispin = 0; ispin < ctx_.num_spins(); ispin++) {
663 #pragma omp for
664 for (int i = 0; i < kp->num_occupied_bands(ispin); i++) {
665 for (int igloc = 0; igloc < kp->num_gkvec_loc(); igloc++) {
666 auto Gk = kp->gkvec().template gkvec_cart<index_domain_t::local>(igloc);
667
668 double f = kp->band_occupancy(i, ispin);
669 auto z = kp->spinor_wave_functions().pw_coeffs(igloc, wf::spin_index(ispin), wf::band_index(i));
670 double d = fact * f * (std::pow(z.real(), 2) + std::pow(z.imag(), 2));
671 for (int mu : {0, 1, 2}) {
672 for (int nu : {0, 1, 2}) {
673 tmp(mu, nu) += Gk[mu] * Gk[nu] * d;
674 }
675 }
676 }
677 }
678 }
679 #pragma omp critical
680 stress_kin_ += tmp;
681 }
682 } // ikloc
683
684 ctx_.comm().allreduce(&stress_kin_(0, 0), 9);
685
686 stress_kin_ *= (-1.0 / ctx_.unit_cell().omega());
687
688 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_kin_);
689}
690
692Stress::calc_stress_kin()
693{
694 PROFILE("sirius::Stress|kin");
695 if (ctx_.cfg().parameters().precision_wf() == "fp32") {
696#if defined(SIRIUS_USE_FP32)
697 this->calc_stress_kin_aux<float>();
698#endif
699 } else {
700 this->calc_stress_kin_aux<double>();
701 }
702 return stress_kin_;
703}
704
705r3::matrix<double>
707{
708 PROFILE("sirius::Stress|vloc");
709
710 stress_vloc_.zero();
711
712 auto q = ctx_.gvec().shells_len();
713 auto const ri_vloc = ctx_.ri().vloc_->values(q, ctx_.comm());
714 auto const ri_vloc_dg = ctx_.ri().vloc_djl_->values(q, ctx_.comm());
715
716 auto v = make_periodic_function<index_domain_t::local>(ctx_.unit_cell(), ctx_.gvec(),
717 ctx_.phase_factors_t(), ri_vloc);
718 auto dv = make_periodic_function<index_domain_t::local>(ctx_.unit_cell(), ctx_.gvec(),
719 ctx_.phase_factors_t(), ri_vloc_dg);
720
721 double sdiag{0};
722
723 int ig0 = ctx_.gvec().skip_g0();
724 for (int igloc = ig0; igloc < ctx_.gvec().count(); igloc++) {
725
726 auto G = ctx_.gvec().gvec_cart<index_domain_t::local>(igloc);
727
728 for (int mu : {0, 1, 2}) {
729 for (int nu : {0, 1, 2}) {
730 stress_vloc_(mu, nu) +=
731 std::real(std::conj(density_.rho().rg().f_pw_local(igloc)) * dv[igloc]) * G[mu] * G[nu];
732 }
733 }
734
735 sdiag += std::real(std::conj(density_.rho().rg().f_pw_local(igloc)) * v[igloc]);
736 }
737
738 if (ctx_.gvec().reduced()) {
739 stress_vloc_ *= 2;
740 sdiag *= 2;
741 }
742 if (ctx_.comm().rank() == 0) {
743 sdiag += std::real(std::conj(density_.rho().rg().f_pw_local(0)) * v[0]);
744 }
745
746 for (int mu : {0, 1, 2}) {
747 stress_vloc_(mu, mu) -= sdiag;
748 }
749
750 ctx_.comm().allreduce(&stress_vloc_(0, 0), 9);
751
752 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_vloc_);
753
754 return stress_vloc_;
755}
756
758Stress::calc_stress_nonloc()
759{
760 if (ctx_.cfg().parameters().precision_wf() == "fp32") {
761#if defined(SIRIUS_USE_FP32)
762 if (ctx_.gamma_point()) {
763 calc_stress_nonloc_aux<float, float>();
764 } else {
765 calc_stress_nonloc_aux<float, std::complex<float>>();
766 }
767#else
768 RTE_THROW("Not compiled with FP32 support");
769#endif
770 } else {
771 if (ctx_.gamma_point()) {
772 calc_stress_nonloc_aux<double, double>();
773 } else {
774 calc_stress_nonloc_aux<double, std::complex<double>>();
775 }
776 }
777
778 return stress_nonloc_;
779}
780
781} // namespace sirius
Data and methods specific to the actual atom in the unit cell.
Definition: atom.hpp:37
Atom_type const & type() const
Return const reference to corresponding atom type object.
Definition: atom.hpp:318
Augmentation charge operator Q(r) of the ultrasoft pseudopotential formalism.
void generate_pw_coeffs_gvec_deriv(int nu__)
Generate G-vector derivative Q_{xi,xi'}(G)/dG of the plane-wave coefficients *‍/.
sddk::mdarray< double, 3 > density_matrix_aux(Atom_type const &atom_type__) const
Return density matrix for all atoms of a given type in auxiliary form.
Definition: density.cpp:1810
auto const & rho() const
Return const reference to charge density (scalar functions).
Definition: density.hpp:416
auto gvec_phase_factor(r3::vector< int > G__, int ia__) const
Phase factors .
auto const & gvec() const
Return const reference to Gvec object.
auto processing_unit_memory_t() const
Return the memory type for processing unit.
auto gvec_fft_sptr() const
Return shared pointer to Gvec_fft object.
std::ostream & out() const
Return output stream.
double ewald_lambda() const
Find the lambda parameter used in the Ewald summation.
mpi::Communicator const & comm() const
Total communicator of the simulation.
int num_spins() const
Number of spin components.
bool gamma_point(bool gamma_point__)
Set flag for Gamma-point calculation.
int num_mag_dims() const
Number of dimensions in the magnetization vector.
Representation of a smooth (Fourier-transformable) periodic function.
void zero()
Zero the values on the regular real-space grid and plane-wave coefficients.
r3::matrix< double > calc_stress_xc()
XC contribution to stress.
Definition: stress.cpp:284
r3::matrix< double > calc_stress_ewald()
Ewald energy contribution to stress.
Definition: stress.cpp:490
r3::matrix< double > calc_stress_us()
Contribution to the stress tensor from the augmentation operator.
Definition: stress.cpp:371
r3::matrix< double > calc_stress_vloc()
Local potential contribution to stress.
Definition: stress.cpp:706
r3::matrix< double > calc_stress_har()
Hartree energy contribution to stress.
Definition: stress.cpp:613
void calc_stress_kin_aux()
Kinetic energy contribution to stress.
Definition: stress.cpp:649
void calc_stress_nonloc_aux()
Non-local contribution to stress.
Definition: stress.cpp:37
r3::matrix< double > calc_stress_core()
Non-linear core correction to stress tensor.
Definition: stress.cpp:217
void gemm(char transa, char transb, ftn_int m, ftn_int n, ftn_int k, T const *alpha, T const *A, ftn_int lda, T const *B, ftn_int ldb, T const *beta, T *C, ftn_int ldc, acc::stream_id sid=acc::stream_id(-1)) const
General matrix-matrix multiplication.
MPI communicator wrapper.
void allreduce(T *buffer__, int count__) const
Perform the in-place (the output buffer is used as the input buffer) all-to-all reduction.
int rank() const
Rank of MPI process inside communicator.
Multidimensional array with the column-major (Fortran) order.
Definition: memory.hpp:660
void zero(memory_t mem__, size_t idx0__, size_t n__)
Zero n elements starting from idx0.
Definition: memory.hpp:1316
uint32_t ld() const
Return leading dimension size.
Definition: memory.hpp:1233
Total energy terms.
Contains definition of sirius::K_point class.
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
Definition: memory.hpp:93
memory_t
Memory types where the code can store data.
Definition: memory.hpp:71
lib_t
Type of linear algebra backend library.
Definition: linalg_base.hpp:70
@ spla
SPLA library. Can take CPU and device pointers.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
Smooth_periodic_vector_function< T > gradient(Smooth_periodic_function< T > &f__)
Gradient of the function in the plane-wave domain.
double energy_vxc(Density const &density, Potential const &potential)
Returns exchange correlation potential.
Definition: energy.cpp:76
const double twopi
Definition: constants.hpp:45
const double pi
Definition: constants.hpp:42
double energy_bxc(const Density &density, const Potential &potential)
TODO doc.
Definition: energy.cpp:94
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165
double energy_exc(Density const &density, Potential const &potential)
Returns exchange correlation energy.
Definition: energy.cpp:82
Common operation for forces and stress tensor.
A time-based profiler.
Simple classes and functions to work with vectors and matrices of the R^3 space.
Contains definition of sirius::Stress tensor class.
Symmetrize lattice stress tensor.