SIRIUS 7.5.0
Electronic structure library and applications
force.cpp
Go to the documentation of this file.
1// Copyright (c) 2013-2019 Anton Kozhevnikov, Ilia Sivkov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file force.cpp
21 *
22 * \brief Contains implementation of sirius::Force class.
23 */
24
25#include "force.hpp"
26#include "k_point/k_point.hpp"
28#include "density/density.hpp"
33#include "non_local_functor.hpp"
37
38namespace sirius {
39
40Force::Force(Simulation_context& ctx__, Density& density__, Potential& potential__, K_point_set& kset__)
41 : ctx_(ctx__)
42 , density_(density__)
43 , potential_(potential__)
44 , kset_(kset__)
45{
46}
47
48template <typename T>
49void Force::calc_forces_nonloc_aux()
50{
51 forces_nonloc_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
52 forces_nonloc_.zero();
53
54 auto& spl_num_kp = kset_.spl_num_kpoints();
55
56 for (auto it : spl_num_kp) {
57 auto* kp = kset_.get<T>(it.i);
58
59 if (ctx_.gamma_point()) {
60 add_k_point_contribution<T, T>(*kp, forces_nonloc_);
61 } else {
62 add_k_point_contribution<T, std::complex<T>>(*kp, forces_nonloc_);
63 }
64 }
65
66 ctx_.comm().allreduce(&forces_nonloc_(0, 0), 3 * ctx_.unit_cell().num_atoms());
67
68 symmetrize_forces(ctx_.unit_cell(), forces_nonloc_);
69}
70
71sddk::mdarray<double, 2> const& Force::calc_forces_nonloc()
72{
73 PROFILE("sirius::Force::calc_forces_nonloc");
74
75 if (ctx_.cfg().parameters().precision_wf() == "fp32") {
76#if defined(SIRIUS_USE_FP32)
77 this->calc_forces_nonloc_aux<float>();
78#endif
79 } else {
80 this->calc_forces_nonloc_aux<double>();
81 }
82 return forces_nonloc_;
83}
84
85template <typename T, typename F>
86void
87Force::add_k_point_contribution(K_point<T>& kp__, sddk::mdarray<double, 2>& forces__) const
88{
89 /* if there are no beta projectors then get out there */
90 if (ctx_.unit_cell().max_mt_basis_size() == 0) {
91 return;
92 }
93
94 Beta_projectors_gradient<T> bp_grad(ctx_, kp__.gkvec(), kp__.beta_projectors());
95 auto mem = ctx_.processing_unit_memory_t();
96 auto mg = kp__.spinor_wave_functions().memory_guard(mem, wf::copy_to::device);
97
98 sddk::mdarray<real_type<F>, 2> f(3, ctx_.unit_cell().num_atoms());
99 f.zero();
100
101 add_k_point_contribution_nonlocal<T, F>(ctx_, bp_grad, kp__, f);
102
103 for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
104 for (int x : {0, 1, 2}) {
105 forces__(x, ia) += f(x, ia);
106 }
107 }
108}
109
110void
111Force::compute_dmat(K_point<double>* kp__, la::dmatrix<std::complex<double>>& dm__) const
112{
113 dm__.zero();
114
115 /* trivial case */
116 if (!ctx_.need_sv()) {
117 for (int i = 0; i < ctx_.num_fv_states(); i++) {
118 dm__.set(i, i, std::complex<double>(kp__->band_occupancy(i, 0), 0));
119 }
120 } else {
121 if (ctx_.num_mag_dims() != 3) {
122 la::dmatrix<std::complex<double>> ev1(ctx_.num_fv_states(), ctx_.num_fv_states(), ctx_.blacs_grid(),
123 ctx_.cyclic_block_size(), ctx_.cyclic_block_size());
124 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
125 auto& ev = kp__->sv_eigen_vectors(ispn);
126 /* multiply second-variational eigen-vectors with band occupancies */
127 for (int j = 0; j < ev.num_cols_local(); j++) {
128 /* up- or dn- band index */
129 int jb = ev.icol(j);
130 for (int i = 0; i < ev.num_rows_local(); i++) {
131 ev1(i, j) = std::conj(ev(i, j)) * kp__->band_occupancy(jb, ispn);
132 }
133 }
134
135 la::wrap(la::lib_t::scalapack)
136 .gemm('N', 'T', ctx_.num_fv_states(), ctx_.num_fv_states(), ctx_.num_bands(),
137 &la::constant<std::complex<double>>::one(), ev1, 0, 0, ev, 0, 0,
138 &la::constant<std::complex<double>>::one(), dm__, 0, 0);
139 }
140 } else {
141 la::dmatrix<std::complex<double>> ev1(ctx_.num_bands(), ctx_.num_bands(), ctx_.blacs_grid(),
142 ctx_.cyclic_block_size(), ctx_.cyclic_block_size());
143 auto& ev = kp__->sv_eigen_vectors(0);
144 /* multiply second-variational eigen-vectors with band occupancies */
145 for (int j = 0; j < ev.num_cols_local(); j++) {
146 /* band index */
147 int jb = ev.icol(j);
148 for (int i = 0; i < ev.num_rows_local(); i++) {
149 ev1(i, j) = std::conj(ev(i, j)) * kp__->band_occupancy(jb, 0);
150 }
151 }
152 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
153 int offs = ispn * ctx_.num_fv_states();
154
155 la::wrap(la::lib_t::scalapack)
156 .gemm('N', 'T', ctx_.num_fv_states(), ctx_.num_fv_states(), ctx_.num_bands(),
157 &la::constant<std::complex<double>>::one(), ev1, offs, 0, ev, offs, 0,
158 &la::constant<std::complex<double>>::one(), dm__, 0, 0);
159 }
160 }
161 }
162}
163
165Force::calc_forces_total()
166{
167 forces_total_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
168 if (ctx_.full_potential()) {
169 calc_forces_rho();
170 calc_forces_hf();
171 calc_forces_ibs();
172 for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
173 for (int x : {0, 1, 2}) {
174 forces_total_(x, ia) = forces_ibs_(x, ia) + forces_hf_(x, ia) + forces_rho_(x, ia);
175 }
176 }
177 } else {
178 calc_forces_vloc();
179 calc_forces_us();
180 calc_forces_nonloc();
181 calc_forces_core();
182 calc_forces_ewald();
183 calc_forces_scf_corr();
184 calc_forces_hubbard();
185
186 forces_total_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
187 for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
188 for (int x : {0, 1, 2}) {
189 forces_total_(x, ia) = forces_vloc_(x, ia) + forces_us_(x, ia) + forces_nonloc_(x, ia) +
190 forces_core_(x, ia) + forces_ewald_(x, ia) + forces_scf_corr_(x, ia) +
191 forces_hubbard_(x, ia);
192 }
193 }
194 }
195 return forces_total_;
196}
197
198sddk::mdarray<double, 2> const&
199Force::calc_forces_ibs()
200{
201 forces_ibs_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
202 forces_ibs_.zero();
203
204 sddk::mdarray<double, 2> ffac(ctx_.unit_cell().num_atom_types(), ctx_.gvec().num_shells());
205 #pragma omp parallel for
206 for (int igs = 0; igs < ctx_.gvec().num_shells(); igs++) {
207 for (int iat = 0; iat < ctx_.unit_cell().num_atom_types(); iat++) {
208 ffac(iat, igs) = unit_step_function_form_factors(ctx_.unit_cell().atom_type(iat).mt_radius(),
209 ctx_.gvec().shell_len(igs));
210 }
211 }
212
213 Hamiltonian0<double> H0(potential_, false);
214 for (auto it : kset_.spl_num_kpoints()) {
215 auto hk = H0(*kset_.get<double>(it.i));
216 add_ibs_force(kset_.get<double>(it.i), hk, ffac, forces_ibs_);
217 }
218 ctx_.comm().allreduce(&forces_ibs_(0, 0), (int)forces_ibs_.size());
219 symmetrize_forces(ctx_.unit_cell(), forces_ibs_);
220
221 return forces_ibs_;
222}
223
224sddk::mdarray<double, 2> const&
225Force::calc_forces_rho()
226{
227
228 forces_rho_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
229 forces_rho_.zero();
230 for (auto it : ctx_.unit_cell().spl_num_atoms()) {
231 auto g = gradient(density_.density_mt(it.li));
232 for (int x = 0; x < 3; x++) {
233 forces_rho_(x, it.i) = inner(potential_.effective_potential_mt(it.li), g[x]);
234 }
235 }
236 ctx_.comm().allreduce(&forces_rho_(0, 0), (int)forces_rho_.size());
237 symmetrize_forces(ctx_.unit_cell(), forces_rho_);
238
239 return forces_rho_;
240}
241
242sddk::mdarray<double, 2> const&
243Force::calc_forces_hf()
244{
245 forces_hf_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
246 forces_hf_.zero();
247
248 for (auto it : ctx_.unit_cell().spl_num_atoms()) {
249 auto g = gradient(potential_.hartree_potential_mt(it.li));
250 for (int x = 0; x < 3; x++) {
251 forces_hf_(x, it.i) = ctx_.unit_cell().atom(it.i).zn() * g[x](0, 0) * y00;
252 }
253 }
254 ctx_.comm().allreduce(&forces_hf_(0, 0), (int)forces_hf_.size());
255 symmetrize_forces(ctx_.unit_cell(), forces_hf_);
256
257 return forces_hf_;
258}
259
260sddk::mdarray<double, 2> const&
261Force::calc_forces_hubbard()
262{
263 PROFILE("sirius::Force::hubbard_force");
264 forces_hubbard_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
265 forces_hubbard_.zero();
266
267 if (ctx_.hubbard_correction()) {
268 /* recompute the hubbard potential */
269 ::sirius::generate_potential(density_.occupation_matrix(), potential_.hubbard_potential());
270
271 Q_operator<double> q_op(ctx_);
272
273 for (auto it : kset_.spl_num_kpoints()) {
274
275 auto kp = kset_.get<double>(it.i);
276 // kp->beta_projectors().prepare();
277 auto mg1 = kp->spinor_wave_functions().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);
278 auto mg2 = kp->hubbard_wave_functions_S().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);
279 auto mg3 = kp->atomic_wave_functions().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);
280 auto mg4 = kp->atomic_wave_functions_S().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);
281
282 if (ctx_.num_mag_dims() == 3) {
283 RTE_THROW("Hubbard forces are only implemented for the simple hubbard correction.");
284 }
285 hubbard_force_add_k_contribution_collinear(*kp, q_op, forces_hubbard_);
286 }
287
288 /* global reduction */
289 kset_.comm().allreduce(forces_hubbard_.at(sddk::memory_t::host), 3 * ctx_.unit_cell().num_atoms());
290 }
291
292 symmetrize_forces(ctx_.unit_cell(), forces_hubbard_);
293 return forces_hubbard_;
294}
295
296sddk::mdarray<double, 2> const&
297Force::calc_forces_ewald()
298{
299 PROFILE("sirius::Force::calc_forces_ewald");
300
301 forces_ewald_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
302 forces_ewald_.zero();
303
304 Unit_cell& unit_cell = ctx_.unit_cell();
305
306 double alpha = ctx_.ewald_lambda();
307
308 double prefac = (ctx_.gvec().reduced() ? 4.0 : 2.0) * (twopi / unit_cell.omega());
309
310 int ig0 = ctx_.gvec().skip_g0();
311
312 sddk::mdarray<std::complex<double>, 1> rho_tmp(ctx_.gvec().count());
313 rho_tmp.zero();
314 #pragma omp parallel for schedule(static)
315 for (int igloc = ig0; igloc < ctx_.gvec().count(); igloc++) {
316 int ig = ctx_.gvec().offset() + igloc;
317
318 std::complex<double> rho(0, 0);
319
320 for (int ja = 0; ja < unit_cell.num_atoms(); ja++) {
321 rho += ctx_.gvec_phase_factor(ig, ja) * static_cast<double>(unit_cell.atom(ja).zn());
322 }
323
324 rho_tmp[igloc] = std::conj(rho);
325 }
326
327 #pragma omp parallel for
328 for (int ja = 0; ja < unit_cell.num_atoms(); ja++) {
329 for (int igloc = ig0; igloc < ctx_.gvec().count(); igloc++) {
330 int ig = ctx_.gvec().offset() + igloc;
331
332 double g2 = std::pow(ctx_.gvec().gvec_len<index_domain_t::local>(igloc), 2);
333
334 /* cartesian form for getting cartesian force components */
335 auto gvec_cart = ctx_.gvec().gvec_cart<index_domain_t::local>(igloc);
336
337 double scalar_part = prefac * (rho_tmp[igloc] * ctx_.gvec_phase_factor(ig, ja)).imag() *
338 static_cast<double>(unit_cell.atom(ja).zn()) * std::exp(-g2 / (4 * alpha)) / g2;
339
340 for (int x : {0, 1, 2}) {
341 forces_ewald_(x, ja) += scalar_part * gvec_cart[x];
342 }
343 }
344 }
345
346 ctx_.comm().allreduce(&forces_ewald_(0, 0), 3 * ctx_.unit_cell().num_atoms());
347
348 double invpi = 1. / pi;
349
350 #pragma omp parallel for
351 for (int ia = 0; ia < unit_cell.num_atoms(); ia++) {
352 for (int i = 1; i < unit_cell.num_nearest_neighbours(ia); i++) {
353 int ja = unit_cell.nearest_neighbour(i, ia).atom_id;
354
355 double d = unit_cell.nearest_neighbour(i, ia).distance;
356 double d2 = d * d;
357
358 auto t = dot(unit_cell.lattice_vectors(), r3::vector<int>(unit_cell.nearest_neighbour(i, ia).translation));
359
360 double scalar_part =
361 static_cast<double>(unit_cell.atom(ia).zn() * unit_cell.atom(ja).zn()) / d2 *
362 (std::erfc(std::sqrt(alpha) * d) / d + 2.0 * std::sqrt(alpha * invpi) * std::exp(-d2 * alpha));
363
364 for (int x : {0, 1, 2}) {
365 forces_ewald_(x, ia) += scalar_part * t[x];
366 }
367 }
368 }
369
370 symmetrize_forces(ctx_.unit_cell(), forces_ewald_);
371
372 return forces_ewald_;
373}
374
375sddk::mdarray<double, 2> const&
376Force::calc_forces_us()
377{
378 PROFILE("sirius::Force::calc_forces_us");
379
380 forces_us_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
381 forces_us_.zero();
382
383 potential_.fft_transform(-1);
384
385 Unit_cell& unit_cell = ctx_.unit_cell();
386
387 double reduce_g_fact = ctx_.gvec().reduced() ? 2.0 : 1.0;
388
389 la::lib_t la{la::lib_t::none};
390
391 sddk::memory_pool* mp{nullptr};
392 switch (ctx_.processing_unit()) {
393 case sddk::device_t::CPU: {
394 mp = &get_memory_pool(sddk::memory_t::host);
395 la = la::lib_t::blas;
396 break;
397 }
398 case sddk::device_t::GPU: {
399 mp = &get_memory_pool(sddk::memory_t::host_pinned);
400 la = la::lib_t::spla;
401 break;
402 }
403 }
404
405 /* over atom types */
406 for (int iat = 0; iat < unit_cell.num_atom_types(); iat++) {
407 auto& atom_type = unit_cell.atom_type(iat);
408
409 if (!ctx_.unit_cell().atom_type(iat).augment()) {
410 continue;
411 }
412
413 auto& aug_op = ctx_.augmentation_op(iat);
414
415 int nbf = atom_type.mt_basis_size();
416
417 /* get auxiliary density matrix */
418 auto dm = density_.density_matrix_aux(atom_type);
419
420 sddk::mdarray<double, 2> v_tmp(atom_type.num_atoms(), ctx_.gvec().count() * 2, *mp);
421 sddk::mdarray<double, 2> tmp(nbf * (nbf + 1) / 2, atom_type.num_atoms(), *mp);
422
423 /* over spin components, can be from 1 to 4*/
424 for (int ispin = 0; ispin < ctx_.num_mag_dims() + 1; ispin++) {
425 /* over 3 components of the force/G - vectors */
426 for (int ivec = 0; ivec < 3; ivec++) {
427 /* over local rank G vectors */
428 #pragma omp parallel for schedule(static)
429 for (int igloc = 0; igloc < ctx_.gvec().count(); igloc++) {
430 int ig = ctx_.gvec().offset() + igloc;
431 auto gvc = ctx_.gvec().gvec_cart<index_domain_t::local>(igloc);
432 for (int ia = 0; ia < atom_type.num_atoms(); ia++) {
433 /* here we write in v_tmp -i * G * exp[ iGRn] Veff(G)
434 * but in formula we have i * G * exp[-iGRn] Veff*(G)
435 * the differences because we unfold complex array in the real one
436 * and need negative imagine part due to a multiplication law of complex numbers */
437 auto z = std::complex<double>(0, -gvc[ivec]) * ctx_.gvec_phase_factor(ig, atom_type.atom_id(ia)) *
438 potential_.component(ispin).rg().f_pw_local(igloc);
439 v_tmp(ia, 2 * igloc) = z.real();
440 v_tmp(ia, 2 * igloc + 1) = z.imag();
441 }
442 }
443
444 /* multiply tmp matrices, or sum over G */
445 la::wrap(la).gemm('N', 'T', nbf * (nbf + 1) / 2, atom_type.num_atoms(), 2 * ctx_.gvec().count(),
446 &la::constant<double>::one(), aug_op.q_pw().at(sddk::memory_t::host), aug_op.q_pw().ld(),
447 v_tmp.at(sddk::memory_t::host), v_tmp.ld(), &la::constant<double>::zero(),
448 tmp.at(sddk::memory_t::host), tmp.ld());
449
450 #pragma omp parallel for
451 for (int ia = 0; ia < atom_type.num_atoms(); ia++) {
452 for (int i = 0; i < nbf * (nbf + 1) / 2; i++) {
453 forces_us_(ivec, atom_type.atom_id(ia)) += ctx_.unit_cell().omega() * reduce_g_fact *
454 dm(i, ia, ispin) * aug_op.sym_weight(i) *
455 tmp(i, ia);
456 }
457 }
458 }
459 }
460 }
461
462 ctx_.comm().allreduce(&forces_us_(0, 0), 3 * ctx_.unit_cell().num_atoms());
463
464 symmetrize_forces(ctx_.unit_cell(), forces_us_);
465
466 return forces_us_;
467}
468
469sddk::mdarray<double, 2> const&
470Force::calc_forces_scf_corr()
471{
472 PROFILE("sirius::Force::calc_forces_scf_corr");
473
474 auto q = ctx_.gvec().shells_len();
475 /* get form-factors for all G shells */
476 auto ff = ctx_.ri().ps_rho_->values(q, ctx_.comm());
477
478 forces_scf_corr_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
479 forces_scf_corr_.zero();
480
481 /* get main arrays */
482 auto& dveff = potential_.dveff();
483
484 Unit_cell& unit_cell = ctx_.unit_cell();
485
486 fft::Gvec const& gvec = ctx_.gvec();
487
488 int gvec_count = gvec.count();
489 int gvec_offset = gvec.offset();
490
491 double fact = gvec.reduced() ? 2.0 : 1.0;
492
493 int ig0 = ctx_.gvec().skip_g0();
494
495 #pragma omp parallel for
496 for (int ia = 0; ia < unit_cell.num_atoms(); ia++) {
497 Atom& atom = unit_cell.atom(ia);
498
499 int iat = atom.type_id();
500
501 for (int igloc = ig0; igloc < gvec_count; igloc++) {
502 int ig = gvec_offset + igloc;
503 int igsh = ctx_.gvec().shell(ig);
504
505 /* cartesian form for getting cartesian force components */
506 auto gvec_cart = gvec.gvec_cart<index_domain_t::local>(igloc);
507
508 /* scalar part of a force without multipying by G-vector */
509 std::complex<double> z =
510 fact * fourpi * ff(igsh, iat) * std::conj(dveff.f_pw_local(igloc) * ctx_.gvec_phase_factor(ig, ia));
511
512 /* get force components multiplying by cartesian G-vector */
513 for (int x : {0, 1, 2}) {
514 forces_scf_corr_(x, ia) -= (gvec_cart[x] * z).imag();
515 }
516 }
517 }
518 ctx_.comm().allreduce(&forces_scf_corr_(0, 0), 3 * ctx_.unit_cell().num_atoms());
519
520 symmetrize_forces(ctx_.unit_cell(), forces_scf_corr_);
521
522 return forces_scf_corr_;
523}
524
526Force::calc_forces_core()
527{
528 PROFILE("sirius::Force::calc_forces_core");
529
530 auto q = ctx_.gvec().shells_len();
531 /* get form-factors for all G shells */
532 auto ff = ctx_.ri().ps_core_->values(q, ctx_.comm());
533
534 forces_core_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
535 forces_core_.zero();
536
537 /* get main arrays */
538 auto& xc_pot = potential_.xc_potential();
539
540 /* transform from real space to reciprocal */
541 xc_pot.rg().fft_transform(-1);
542
543 Unit_cell& unit_cell = ctx_.unit_cell();
544
545 fft::Gvec const& gvecs = ctx_.gvec();
546
547 int gvec_count = gvecs.count();
548 int gvec_offset = gvecs.offset();
549
550 double fact = gvecs.reduced() ? 2.0 : 1.0;
551
552 /* here the calculations are in lattice vectors space */
553 #pragma omp parallel for
554 for (int ia = 0; ia < unit_cell.num_atoms(); ia++) {
555 Atom& atom = unit_cell.atom(ia);
556 if (atom.type().ps_core_charge_density().empty()) {
557 continue;
558 }
559 int iat = atom.type_id();
560
561 for (int igloc = ctx_.gvec().skip_g0(); igloc < gvec_count; igloc++) {
562 int ig = gvec_offset + igloc;
563 auto igsh = ctx_.gvec().shell(ig);
564
565 /* cartesian form for getting cartesian force components */
566 auto gvec_cart = gvecs.gvec_cart<index_domain_t::local>(igloc);
567
568 /* scalar part of a force without multipying by G-vector */
569 std::complex<double> z =
570 fact * fourpi * ff(igsh, iat) * std::conj(xc_pot.rg().f_pw_local(igloc) * ctx_.gvec_phase_factor(ig, ia));
571
572 /* get force components multiplying by cartesian G-vector */
573 for (int x : {0, 1, 2}) {
574 forces_core_(x, ia) -= (gvec_cart[x] * z).imag();
575 }
576 }
577 }
578 ctx_.comm().allreduce(&forces_core_(0, 0), 3 * ctx_.unit_cell().num_atoms());
579
580 symmetrize_forces(ctx_.unit_cell(), forces_core_);
581
582 return forces_core_;
583}
584
585void
586Force::hubbard_force_add_k_contribution_collinear(K_point<double>& kp__, Q_operator<double>& q_op__,
587 sddk::mdarray<double, 2>& forceh__)
588{
589 auto r = ctx_.unit_cell().num_hubbard_wf();
590
591 sddk::mdarray<std::complex<double>, 5> dn(r.first, r.first, ctx_.num_spins(), 3, ctx_.unit_cell().num_atoms());
592
593 potential_.U().compute_occupancies_derivatives(kp__, q_op__, dn);
594
595 #pragma omp parallel for
596 for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
597 /* compute the derivative of the occupancies numbers */
598 for (int dir = 0; dir < 3; dir++) {
599 std::complex<double> d{0.0};
600 for (int at_lvl = 0; at_lvl < static_cast<int>(potential_.hubbard_potential().local().size()); at_lvl++) {
601 const int ia1 = potential_.hubbard_potential().atomic_orbitals(at_lvl).first;
602 const auto& atom = ctx_.unit_cell().atom(ia1);
603 const int lo = potential_.hubbard_potential().atomic_orbitals(at_lvl).second;
604 if (atom.type().lo_descriptor_hub(lo).use_for_calculation()) {
605 int const lmax_at = 2 * atom.type().lo_descriptor_hub(lo).l() + 1;
606 const int offset = potential_.hubbard_potential().offset(at_lvl);
607 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
608 for (int m2 = 0; m2 < lmax_at; m2++) {
609 for (int m1 = 0; m1 < lmax_at; m1++) {
610 d += potential_.hubbard_potential().local(at_lvl)(m1, m2, ispn) *
611 dn(offset + m2, offset + m1, ispn, dir, ia);
612 }
613 }
614 }
615 }
616 }
617
618 for (int i = 0; i < ctx_.cfg().hubbard().nonlocal().size(); i++) {
619 auto nl = ctx_.cfg().hubbard().nonlocal(i);
620 int ia1 = nl.atom_pair()[0];
621 int ja = nl.atom_pair()[1];
622 // consider the links that involve atom i
623 int il = nl.l()[0];
624 int jl = nl.l()[1];
625 int in = nl.n()[0];
626 int jn = nl.n()[1];
627 auto Tr = nl.T();
628
629 auto z1 = std::exp(std::complex<double>(0, -twopi * dot(r3::vector<int>(Tr), kp__.vk())));
630 const int at_lvl1 = potential_.hubbard_potential().find_orbital_index(ia1, in, il);
631 const int at_lvl2 = potential_.hubbard_potential().find_orbital_index(ja, jn, jl);
632 const int offset1 = potential_.hubbard_potential().offset(at_lvl1);
633 const int offset2 = potential_.hubbard_potential().offset(at_lvl2);
634 for (int is = 0; is < ctx_.num_spins(); is++) {
635 for (int m2 = 0; m2 < 2 * jl + 1; m2++) {
636 for (int m1 = 0; m1 < 2 * il + 1; m1++) {
637 auto result1_ = z1 * std::conj(dn(offset2 + m2, offset1 + m1, is, dir, ia)) *
638 potential_.hubbard_potential().nonlocal(i)(m1, m2, is);
639 d += std::real(result1_);
640 }
641 }
642 }
643 }
644 forceh__(dir, ia) -= std::real(d);
645 }
646 }
647}
648
650Force::calc_forces_vloc()
651{
652 PROFILE("sirius::Force::calc_forces_vloc");
653
654 auto q = ctx_.gvec().shells_len();
655 /* get form-factors for all G shells */
656 auto ff = ctx_.ri().vloc_->values(q, ctx_.comm());
657
658 forces_vloc_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
659 forces_vloc_.zero();
660
661 auto& valence_rho = density_.rho();
662
663 Unit_cell& unit_cell = ctx_.unit_cell();
664
665 auto const& gvecs = ctx_.gvec();
666
667 int gvec_count = gvecs.gvec_count(ctx_.comm().rank());
668 int gvec_offset = gvecs.gvec_offset(ctx_.comm().rank());
669
670 double fact = valence_rho.rg().gvec().reduced() ? 2.0 : 1.0;
671
672 /* here the calculations are in lattice vectors space */
673 #pragma omp parallel for
674 for (int ia = 0; ia < unit_cell.num_atoms(); ia++) {
675 Atom& atom = unit_cell.atom(ia);
676
677 int iat = atom.type_id();
678
679 for (int igloc = 0; igloc < gvec_count; igloc++) {
680 int ig = gvec_offset + igloc;
681 int igsh = ctx_.gvec().shell(ig);
682
683 /* cartesian form for getting cartesian force components */
684 auto gvec_cart = gvecs.gvec_cart<index_domain_t::local>(igloc);
685
686 /* scalar part of a force without multiplying by G-vector */
687 std::complex<double> z = fact * fourpi * ff(igsh, iat) * std::conj(valence_rho.rg().f_pw_local(igloc)) *
688 std::conj(ctx_.gvec_phase_factor(ig, ia));
689
690 /* get force components multiplying by cartesian G-vector */
691 for (int x : {0, 1, 2}) {
692 forces_vloc_(x, ia) -= (gvec_cart[x] * z).imag();
693 }
694 }
695 }
696
697 ctx_.comm().allreduce(&forces_vloc_(0, 0), 3 * ctx_.unit_cell().num_atoms());
698
699 symmetrize_forces(ctx_.unit_cell(), forces_vloc_);
700
701 return forces_vloc_;
702}
703
704sddk::mdarray<double, 2> const&
705Force::calc_forces_usnl()
706{
707 calc_forces_us();
708 calc_forces_nonloc();
709
710 forces_usnl_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
711 for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
712 for (int x : {0, 1, 2}) {
713 forces_usnl_(x, ia) = forces_us_(x, ia) + forces_nonloc_(x, ia);
714 }
715 }
716
717 return forces_usnl_;
718}
719
720void
721Force::add_ibs_force(K_point<double>* kp__, Hamiltonian_k<double>& Hk__, sddk::mdarray<double, 2>& ffac__,
722 sddk::mdarray<double, 2>& forcek__) const
723{
724 PROFILE("sirius::Force::ibs_force");
725
726 auto& uc = ctx_.unit_cell();
727
728 auto& bg = ctx_.blacs_grid();
729
730 auto bs = ctx_.cyclic_block_size();
731
732 auto nfv = ctx_.num_fv_states();
733
734 auto ngklo = kp__->gklo_basis_size();
735
736 /* compute density matrix for a k-point */
737 la::dmatrix<std::complex<double>> dm(nfv, nfv, bg, bs, bs);
738 compute_dmat(kp__, dm);
739
740 /* first-variational eigen-vectors in scalapack distribution */
741 auto& fv_evec = kp__->fv_eigen_vectors();
742
743 la::dmatrix<std::complex<double>> h(ngklo, ngklo, bg, bs, bs);
744 la::dmatrix<std::complex<double>> o(ngklo, ngklo, bg, bs, bs);
745
746 la::dmatrix<std::complex<double>> h1(ngklo, ngklo, bg, bs, bs);
747 la::dmatrix<std::complex<double>> o1(ngklo, ngklo, bg, bs, bs);
748
749 la::dmatrix<std::complex<double>> zm1(ngklo, nfv, bg, bs, bs);
750 la::dmatrix<std::complex<double>> zf(nfv, nfv, bg, bs, bs);
751
752 sddk::mdarray<std::complex<double>, 2> alm_row(kp__->num_gkvec_row(), uc.max_mt_aw_basis_size());
753 sddk::mdarray<std::complex<double>, 2> alm_col(kp__->num_gkvec_col(), uc.max_mt_aw_basis_size());
754 sddk::mdarray<std::complex<double>, 2> halm_col(kp__->num_gkvec_col(), uc.max_mt_aw_basis_size());
755
756 for (int ia = 0; ia < uc.num_atoms(); ia++) {
757 h.zero();
758 o.zero();
759
760 auto& atom = uc.atom(ia);
761 auto& type = atom.type();
762
763 /* generate matching coefficients for current atom */
764 kp__->alm_coeffs_row().generate<true>(atom, alm_row);
765 kp__->alm_coeffs_col().generate<false>(atom, alm_col);
766
767 /* setup apw-lo and lo-apw blocks */
768 Hk__.set_fv_h_o_apw_lo(atom, ia, alm_row, alm_col, h, o);
769
770 /* apply MT Hamiltonian to column coefficients */
771 Hk__.H0().apply_hmt_to_apw<spin_block_t::nm>(atom, kp__->num_gkvec_col(), alm_col, halm_col);
772
773 /* apw-apw block of the overlap matrix */
774 la::wrap(la::lib_t::blas)
775 .gemm('N', 'T', kp__->num_gkvec_row(), kp__->num_gkvec_col(), type.mt_aw_basis_size(),
776 &la::constant<std::complex<double>>::one(), alm_row.at(sddk::memory_t::host), alm_row.ld(),
777 alm_col.at(sddk::memory_t::host), alm_col.ld(), &la::constant<std::complex<double>>::zero(),
778 o.at(sddk::memory_t::host), o.ld());
779
780 /* apw-apw block of the Hamiltonian matrix */
781 la::wrap(la::lib_t::blas)
782 .gemm('N', 'T', kp__->num_gkvec_row(), kp__->num_gkvec_col(), type.mt_aw_basis_size(),
783 &la::constant<std::complex<double>>::one(), alm_row.at(sddk::memory_t::host), alm_row.ld(),
784 halm_col.at(sddk::memory_t::host), halm_col.ld(), &la::constant<std::complex<double>>::zero(),
785 h.at(sddk::memory_t::host), h.ld());
786
787 int iat = type.id();
788
789 for (int igk_col = 0; igk_col < kp__->num_gkvec_col(); igk_col++) { // loop over columns
790 auto gvec_col = kp__->gkvec_col().gvec<index_domain_t::local>(igk_col);
791 auto gkvec_col_cart = kp__->gkvec_col().gkvec_cart<index_domain_t::local>(igk_col);
792 for (int igk_row = 0; igk_row < kp__->num_gkvec_row(); igk_row++) { // for each column loop over rows
793 auto gvec_row = kp__->gkvec_row().gvec<index_domain_t::local>(igk_row);
794 auto gkvec_row_cart = kp__->gkvec_row().gkvec_cart<index_domain_t::local>(igk_row);
795
796 int ig12 = ctx_.gvec().index_g12(gvec_row, gvec_col);
797
798 int igs = ctx_.gvec().shell(ig12);
799
800 auto zt = std::conj(ctx_.gvec_phase_factor(ig12, ia)) * ffac__(iat, igs) * fourpi / uc.omega();
801
802 double t1 = 0.5 * dot(gkvec_row_cart, gkvec_col_cart);
803
804 h(igk_row, igk_col) -= t1 * zt;
805 o(igk_row, igk_col) -= zt;
806 }
807 }
808
809 for (int x = 0; x < 3; x++) {
810 for (int igk_col = 0; igk_col < kp__->num_gkvec_col(); igk_col++) { // loop over columns
811 auto gvec_col = kp__->gkvec_col().gvec<index_domain_t::local>(igk_col);
812 for (int igk_row = 0; igk_row < kp__->num_gkvec_row(); igk_row++) { // loop over rows
813 auto gvec_row = kp__->gkvec_row().gvec<index_domain_t::local>(igk_row);
814 /* compute index of G-G' */
815 int ig12 = ctx_.gvec().index_g12(gvec_row, gvec_col);
816 /* get G-G' */
817 auto vg = ctx_.gvec().gvec_cart<index_domain_t::global>(ig12);
818 /* multiply by i(G-G') */
819 h1(igk_row, igk_col) = std::complex<double>(0.0, vg[x]) * h(igk_row, igk_col);
820 /* multiply by i(G-G') */
821 o1(igk_row, igk_col) = std::complex<double>(0.0, vg[x]) * o(igk_row, igk_col);
822 }
823 }
824
825 for (int icol = 0; icol < kp__->num_lo_col(); icol++) {
826 for (int igk_row = 0; igk_row < kp__->num_gkvec_row(); igk_row++) {
827 auto gkvec_row_cart = kp__->gkvec_row().gkvec_cart<index_domain_t::local>(igk_row);
828 /* multiply by i(G+k) */
829 h1(igk_row, icol + kp__->num_gkvec_col()) =
830 std::complex<double>(0.0, gkvec_row_cart[x]) * h(igk_row, icol + kp__->num_gkvec_col());
831 /* multiply by i(G+k) */
832 o1(igk_row, icol + kp__->num_gkvec_col()) =
833 std::complex<double>(0.0, gkvec_row_cart[x]) * o(igk_row, icol + kp__->num_gkvec_col());
834 }
835 }
836
837 for (int irow = 0; irow < kp__->num_lo_row(); irow++) {
838 for (int igk_col = 0; igk_col < kp__->num_gkvec_col(); igk_col++) {
839 auto gkvec_col_cart = kp__->gkvec_col().gkvec_cart<index_domain_t::local>(igk_col);
840 /* multiply by i(G+k) */
841 h1(irow + kp__->num_gkvec_row(), igk_col) =
842 std::complex<double>(0.0, -gkvec_col_cart[x]) * h(irow + kp__->num_gkvec_row(), igk_col);
843 /* multiply by i(G+k) */
844 o1(irow + kp__->num_gkvec_row(), igk_col) =
845 std::complex<double>(0.0, -gkvec_col_cart[x]) * o(irow + kp__->num_gkvec_row(), igk_col);
846 }
847 }
848
849 /* zm1 = dO * V */
850 la::wrap(la::lib_t::scalapack)
851 .gemm('N', 'N', ngklo, nfv, ngklo, &la::constant<std::complex<double>>::one(), o1, 0, 0, fv_evec, 0, 0,
852 &la::constant<std::complex<double>>::zero(), zm1, 0, 0);
853 /* multiply by energy: zm1 = E * (dO * V) */
854 for (int i = 0; i < zm1.num_cols_local(); i++) {
855 int ist = zm1.icol(i);
856 for (int j = 0; j < kp__->gklo_basis_size_row(); j++) {
857 zm1(j, i) *= kp__->fv_eigen_value(ist);
858 }
859 }
860 /* compute zm1 = dH * V - E * (dO * V) */
861 la::wrap(la::lib_t::scalapack)
862 .gemm('N', 'N', ngklo, nfv, ngklo, &la::constant<std::complex<double>>::one(), h1, 0, 0, fv_evec, 0, 0,
863 &la::constant<std::complex<double>>::m_one(), zm1, 0, 0);
864
865 /* compute zf = V^{+} * zm1 = V^{+} * (dH * V - E * (dO * V)) */
866 la::wrap(la::lib_t::scalapack)
867 .gemm('C', 'N', nfv, nfv, ngklo, &la::constant<std::complex<double>>::one(), fv_evec, 0, 0, zm1, 0, 0,
868 &la::constant<std::complex<double>>::zero(), zf, 0, 0);
869
870 for (int i = 0; i < dm.num_cols_local(); i++) {
871 for (int j = 0; j < dm.num_rows_local(); j++) {
872 forcek__(x, ia) += kp__->weight() * std::real(dm(j, i) * zf(j, i));
873 }
874 }
875 }
876 } // ia
877}
878
879void
880Force::print_info(std::ostream& out__, int verbosity__)
881{
882 auto print_forces = [&](std::string label__, sddk::mdarray<double, 2> const& forces) {
883 out__ << "==== " << label__ << " =====" << std::endl;
884 for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
885 out__ << "atom: " << std::setw(4) << ia << ", force: " << ffmt(15, 7) << forces(0, ia) <<
886 ffmt(15, 7) << forces(1, ia) << ffmt(15, 7) << forces(2, ia) << std::endl;
887 }
888 };
889
890 if (verbosity__ >= 1) {
891 out__ << std::endl;
892 print_forces("total Forces in Ha/bohr", forces_total());
893 }
894
895 if (!ctx_.full_potential() && verbosity__ >= 2) {
896 print_forces("ultrasoft contribution from Qij", forces_us());
897
898 print_forces("non-local contribution from Beta-projector", forces_nonloc());
899
900 print_forces("contribution from local potential", forces_vloc());
901
902 print_forces("contribution from core density", forces_core());
903
904 print_forces("Ewald forces from ions", forces_ewald());
905
906 if (ctx_.hubbard_correction()) {
907 print_forces("contribution from Hubbard correction", forces_hubbard());
908 }
909 }
910}
911
912template
913void
914Force::calc_forces_nonloc_aux<double>();
915#if defined(SIRIUS_USE_FP32)
916template
917void
918Force::calc_forces_nonloc_aux<float>();
919#endif
920
921} // namespace sirius
Contains implementation of sirius::Augmentation_operator class.
Contains declaration and implementation of sirius::Beta_projectors class.
Contains declaration and implementation of sirius::Beta_projectors_gradient class.
const auto & lo_descriptor_hub() const
Definition: atom_type.hpp:1070
int id() const
Return atom type id.
Definition: atom_type.hpp:675
Data and methods specific to the actual atom in the unit cell.
Definition: atom.hpp:37
int type_id() const
Return atom type id.
Definition: atom.hpp:336
Atom_type const & type() const
Return const reference to corresponding atom type object.
Definition: atom.hpp:318
double band_occupancy(int j__, int ispn__) const
Get band occupancy.
Definition: k_point.hpp:434
Representation of a unit cell.
Definition: unit_cell.hpp:43
Atom const & atom(int id__) const
Return const atom instance by id.
Definition: unit_cell.hpp:344
int num_atoms() const
Number of atoms in the unit cell.
Definition: unit_cell.hpp:338
A set of G-vectors for FFTs and G+k basis functions.
Definition: gvec.hpp:130
int gvec_count(int rank__) const
Number of G-vectors for a fine-grained distribution.
Definition: gvec.hpp:498
int offset() const
Offset (in the global index) of G-vectors for a fine-grained distribution for a current MPI rank.
Definition: gvec.hpp:520
int count() const
Number of G-vectors for a fine-grained distribution for the current MPI rank.
Definition: gvec.hpp:506
int gvec_offset(int rank__) const
Offset (in the global index) of G-vectors for a fine-grained distribution.
Definition: gvec.hpp:512
r3::vector< int > gvec(int ig__) const
Return G vector in fractional coordinates.
Definition: gvec.hpp:540
r3::vector< double > gvec_cart(int ig__) const
Return G vector in Cartesian coordinates.
Definition: gvec.hpp:574
Distributed matrix.
Definition: dmatrix.hpp:56
void gemm(char transa, char transb, ftn_int m, ftn_int n, ftn_int k, T const *alpha, T const *A, ftn_int lda, T const *B, ftn_int ldb, T const *beta, T *C, ftn_int ldc, acc::stream_id sid=acc::stream_id(-1)) const
General matrix-matrix multiplication.
Simple implementation of 3d vector.
Definition: r3.hpp:45
Contains definition and partial implementation of sirius::Density class.
Contains definition of sirius::Force class.
Contains declaration and definition of sirius::Hamiltonian class.
Contains definition of sirius::K_point class.
Contains declaration and partial implementation of sirius::K_point_set class.
void zero(T *ptr__, size_t n__)
Zero the device memory.
Definition: acc.hpp:397
lib_t
Type of linear algebra backend library.
Definition: linalg_base.hpp:70
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
Smooth_periodic_vector_function< T > gradient(Smooth_periodic_function< T > &f__)
Gradient of the function in the plane-wave domain.
const double y00
First spherical harmonic .
Definition: constants.hpp:51
const double pi
Definition: constants.hpp:42
const double fourpi
Definition: constants.hpp:48
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165
double unit_step_function_form_factors(double R__, double g__)
Utility function to generate LAPW unit step function.
Common operation for forces and stress tensor.
Contains declaration and partial implementation of sirius::Potential class.
Generate unit step function for LAPW method.
Symmetrize atomic forces.