40Force::Force(Simulation_context& ctx__, Density& density__, Potential& potential__, K_point_set& kset__)
43 , potential_(potential__)
49void Force::calc_forces_nonloc_aux()
51 forces_nonloc_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
52 forces_nonloc_.zero();
54 auto& spl_num_kp = kset_.spl_num_kpoints();
56 for (
auto it : spl_num_kp) {
57 auto* kp = kset_.get<T>(it.i);
59 if (ctx_.gamma_point()) {
60 add_k_point_contribution<T, T>(*kp, forces_nonloc_);
62 add_k_point_contribution<T, std::complex<T>>(*kp, forces_nonloc_);
66 ctx_.comm().allreduce(&forces_nonloc_(0, 0), 3 * ctx_.unit_cell().num_atoms());
68 symmetrize_forces(ctx_.unit_cell(), forces_nonloc_);
71sddk::mdarray<double, 2>
const& Force::calc_forces_nonloc()
73 PROFILE(
"sirius::Force::calc_forces_nonloc");
75 if (ctx_.cfg().parameters().precision_wf() ==
"fp32") {
76#if defined(SIRIUS_USE_FP32)
77 this->calc_forces_nonloc_aux<float>();
80 this->calc_forces_nonloc_aux<double>();
82 return forces_nonloc_;
85template <
typename T,
typename F>
87Force::add_k_point_contribution(K_point<T>& kp__, sddk::mdarray<double, 2>& forces__)
const
90 if (ctx_.unit_cell().max_mt_basis_size() == 0) {
94 Beta_projectors_gradient<T> bp_grad(ctx_, kp__.gkvec(), kp__.beta_projectors());
95 auto mem = ctx_.processing_unit_memory_t();
96 auto mg = kp__.spinor_wave_functions().memory_guard(mem, wf::copy_to::device);
98 sddk::mdarray<real_type<F>, 2> f(3, ctx_.unit_cell().num_atoms());
101 add_k_point_contribution_nonlocal<T, F>(ctx_, bp_grad, kp__, f);
103 for (
int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
104 for (
int x : {0, 1, 2}) {
105 forces__(x, ia) += f(x, ia);
116 if (!ctx_.need_sv()) {
117 for (
int i = 0; i < ctx_.num_fv_states(); i++) {
118 dm__.set(i, i, std::complex<double>(kp__->
band_occupancy(i, 0), 0));
121 if (ctx_.num_mag_dims() != 3) {
123 ctx_.cyclic_block_size(), ctx_.cyclic_block_size());
124 for (
int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
125 auto& ev = kp__->sv_eigen_vectors(ispn);
127 for (
int j = 0; j < ev.num_cols_local(); j++) {
130 for (
int i = 0; i < ev.num_rows_local(); i++) {
136 .
gemm(
'N',
'T', ctx_.num_fv_states(), ctx_.num_fv_states(), ctx_.num_bands(),
137 &
la::constant<std::complex<double>>::one(), ev1, 0, 0, ev, 0, 0,
138 &
la::constant<std::complex<double>>::one(), dm__, 0, 0);
142 ctx_.cyclic_block_size(), ctx_.cyclic_block_size());
143 auto& ev = kp__->sv_eigen_vectors(0);
145 for (
int j = 0; j < ev.num_cols_local(); j++) {
148 for (
int i = 0; i < ev.num_rows_local(); i++) {
152 for (
int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
153 int offs = ispn * ctx_.num_fv_states();
156 .
gemm(
'N',
'T', ctx_.num_fv_states(), ctx_.num_fv_states(), ctx_.num_bands(),
157 &
la::constant<std::complex<double>>::one(), ev1, offs, 0, ev, offs, 0,
158 &
la::constant<std::complex<double>>::one(), dm__, 0, 0);
165Force::calc_forces_total()
168 if (ctx_.full_potential()) {
172 for (
int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
173 for (
int x : {0, 1, 2}) {
174 forces_total_(x, ia) = forces_ibs_(x, ia) + forces_hf_(x, ia) + forces_rho_(x, ia);
180 calc_forces_nonloc();
183 calc_forces_scf_corr();
184 calc_forces_hubbard();
186 forces_total_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
187 for (
int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
188 for (
int x : {0, 1, 2}) {
189 forces_total_(x, ia) = forces_vloc_(x, ia) + forces_us_(x, ia) + forces_nonloc_(x, ia) +
190 forces_core_(x, ia) + forces_ewald_(x, ia) + forces_scf_corr_(x, ia) +
191 forces_hubbard_(x, ia);
195 return forces_total_;
198sddk::mdarray<double, 2>
const&
199Force::calc_forces_ibs()
201 forces_ibs_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
204 sddk::mdarray<double, 2> ffac(ctx_.unit_cell().num_atom_types(), ctx_.gvec().num_shells());
205 #pragma omp parallel for
206 for (
int igs = 0; igs < ctx_.gvec().num_shells(); igs++) {
207 for (
int iat = 0; iat < ctx_.unit_cell().num_atom_types(); iat++) {
209 ctx_.gvec().shell_len(igs));
213 Hamiltonian0<double> H0(potential_,
false);
214 for (
auto it : kset_.spl_num_kpoints()) {
215 auto hk = H0(*kset_.get<
double>(it.i));
216 add_ibs_force(kset_.get<
double>(it.i), hk, ffac, forces_ibs_);
218 ctx_.comm().allreduce(&forces_ibs_(0, 0), (
int)forces_ibs_.size());
219 symmetrize_forces(ctx_.unit_cell(), forces_ibs_);
224sddk::mdarray<double, 2>
const&
225Force::calc_forces_rho()
228 forces_rho_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
230 for (
auto it : ctx_.unit_cell().spl_num_atoms()) {
231 auto g =
gradient(density_.density_mt(it.li));
232 for (
int x = 0; x < 3; x++) {
233 forces_rho_(x, it.i) =
inner(potential_.effective_potential_mt(it.li), g[x]);
236 ctx_.comm().allreduce(&forces_rho_(0, 0), (
int)forces_rho_.size());
237 symmetrize_forces(ctx_.unit_cell(), forces_rho_);
242sddk::mdarray<double, 2>
const&
243Force::calc_forces_hf()
245 forces_hf_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
248 for (
auto it : ctx_.unit_cell().spl_num_atoms()) {
249 auto g =
gradient(potential_.hartree_potential_mt(it.li));
250 for (
int x = 0; x < 3; x++) {
251 forces_hf_(x, it.i) = ctx_.unit_cell().atom(it.i).zn() * g[x](0, 0) *
y00;
254 ctx_.comm().allreduce(&forces_hf_(0, 0), (int)forces_hf_.size());
255 symmetrize_forces(ctx_.unit_cell(), forces_hf_);
260sddk::mdarray<double, 2>
const&
261Force::calc_forces_hubbard()
263 PROFILE(
"sirius::Force::hubbard_force");
264 forces_hubbard_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
265 forces_hubbard_.zero();
267 if (ctx_.hubbard_correction()) {
269 ::sirius::generate_potential(density_.occupation_matrix(), potential_.hubbard_potential());
271 Q_operator<double> q_op(ctx_);
273 for (
auto it : kset_.spl_num_kpoints()) {
275 auto kp = kset_.get<
double>(it.i);
277 auto mg1 = kp->spinor_wave_functions().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);
278 auto mg2 = kp->hubbard_wave_functions_S().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);
279 auto mg3 = kp->atomic_wave_functions().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);
280 auto mg4 = kp->atomic_wave_functions_S().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);
282 if (ctx_.num_mag_dims() == 3) {
283 RTE_THROW(
"Hubbard forces are only implemented for the simple hubbard correction.");
285 hubbard_force_add_k_contribution_collinear(*kp, q_op, forces_hubbard_);
289 kset_.comm().allreduce(forces_hubbard_.at(sddk::memory_t::host), 3 * ctx_.unit_cell().num_atoms());
292 symmetrize_forces(ctx_.unit_cell(), forces_hubbard_);
293 return forces_hubbard_;
296sddk::mdarray<double, 2>
const&
297Force::calc_forces_ewald()
299 PROFILE(
"sirius::Force::calc_forces_ewald");
301 forces_ewald_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
302 forces_ewald_.zero();
304 Unit_cell& unit_cell = ctx_.unit_cell();
306 double alpha = ctx_.ewald_lambda();
308 double prefac = (ctx_.gvec().reduced() ? 4.0 : 2.0) * (twopi / unit_cell.omega());
310 int ig0 = ctx_.gvec().skip_g0();
312 sddk::mdarray<std::complex<double>, 1> rho_tmp(ctx_.gvec().count());
314 #pragma omp parallel for schedule(static)
315 for (
int igloc = ig0; igloc < ctx_.gvec().count(); igloc++) {
316 int ig = ctx_.gvec().offset() + igloc;
318 std::complex<double> rho(0, 0);
320 for (
int ja = 0; ja < unit_cell.num_atoms(); ja++) {
321 rho += ctx_.gvec_phase_factor(ig, ja) *
static_cast<double>(unit_cell.atom(ja).zn());
327 #pragma omp parallel for
328 for (
int ja = 0; ja < unit_cell.num_atoms(); ja++) {
329 for (
int igloc = ig0; igloc < ctx_.gvec().count(); igloc++) {
330 int ig = ctx_.gvec().offset() + igloc;
332 double g2 = std::pow(ctx_.gvec().gvec_len<index_domain_t::local>(igloc), 2);
335 auto gvec_cart = ctx_.gvec().gvec_cart<index_domain_t::local>(igloc);
337 double scalar_part = prefac * (rho_tmp[igloc] * ctx_.gvec_phase_factor(ig, ja)).imag() *
338 static_cast<double>(unit_cell.atom(ja).zn()) * std::exp(-g2 / (4 * alpha)) / g2;
340 for (
int x : {0, 1, 2}) {
341 forces_ewald_(x, ja) += scalar_part * gvec_cart[x];
346 ctx_.comm().allreduce(&forces_ewald_(0, 0), 3 * ctx_.unit_cell().num_atoms());
348 double invpi = 1. /
pi;
350 #pragma omp parallel for
351 for (
int ia = 0; ia < unit_cell.num_atoms(); ia++) {
352 for (
int i = 1; i < unit_cell.num_nearest_neighbours(ia); i++) {
353 int ja = unit_cell.nearest_neighbour(i, ia).atom_id;
355 double d = unit_cell.nearest_neighbour(i, ia).distance;
358 auto t = dot(unit_cell.lattice_vectors(), r3::vector<int>(unit_cell.nearest_neighbour(i, ia).translation));
361 static_cast<double>(unit_cell.atom(ia).zn() * unit_cell.atom(ja).zn()) / d2 *
362 (std::erfc(std::sqrt(alpha) * d) / d + 2.0 * std::sqrt(alpha * invpi) * std::exp(-d2 * alpha));
364 for (
int x : {0, 1, 2}) {
365 forces_ewald_(x, ia) += scalar_part * t[x];
370 symmetrize_forces(ctx_.unit_cell(), forces_ewald_);
372 return forces_ewald_;
375sddk::mdarray<double, 2>
const&
376Force::calc_forces_us()
378 PROFILE(
"sirius::Force::calc_forces_us");
380 forces_us_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
383 potential_.fft_transform(-1);
385 Unit_cell& unit_cell = ctx_.unit_cell();
387 double reduce_g_fact = ctx_.gvec().reduced() ? 2.0 : 1.0;
391 sddk::memory_pool* mp{
nullptr};
392 switch (ctx_.processing_unit()) {
393 case sddk::device_t::CPU: {
394 mp = &get_memory_pool(sddk::memory_t::host);
395 la = la::lib_t::blas;
398 case sddk::device_t::GPU: {
399 mp = &get_memory_pool(sddk::memory_t::host_pinned);
400 la = la::lib_t::spla;
406 for (
int iat = 0; iat < unit_cell.num_atom_types(); iat++) {
407 auto& atom_type = unit_cell.atom_type(iat);
409 if (!ctx_.unit_cell().atom_type(iat).augment()) {
413 auto& aug_op = ctx_.augmentation_op(iat);
415 int nbf = atom_type.mt_basis_size();
418 auto dm = density_.density_matrix_aux(atom_type);
420 sddk::mdarray<double, 2> v_tmp(atom_type.num_atoms(), ctx_.gvec().count() * 2, *mp);
421 sddk::mdarray<double, 2> tmp(nbf * (nbf + 1) / 2, atom_type.num_atoms(), *mp);
424 for (
int ispin = 0; ispin < ctx_.num_mag_dims() + 1; ispin++) {
426 for (
int ivec = 0; ivec < 3; ivec++) {
428 #pragma omp parallel for schedule(static)
429 for (
int igloc = 0; igloc < ctx_.gvec().count(); igloc++) {
430 int ig = ctx_.gvec().offset() + igloc;
431 auto gvc = ctx_.gvec().gvec_cart<index_domain_t::local>(igloc);
432 for (
int ia = 0; ia < atom_type.num_atoms(); ia++) {
437 auto z = std::complex<double>(0, -gvc[ivec]) * ctx_.gvec_phase_factor(ig, atom_type.atom_id(ia)) *
438 potential_.component(ispin).rg().f_pw_local(igloc);
439 v_tmp(ia, 2 * igloc) = z.real();
440 v_tmp(ia, 2 * igloc + 1) = z.imag();
445 la::wrap(la).gemm(
'N',
'T', nbf * (nbf + 1) / 2, atom_type.num_atoms(), 2 * ctx_.gvec().count(),
446 &la::constant<double>::one(), aug_op.q_pw().at(sddk::memory_t::host), aug_op.q_pw().ld(),
448 tmp.at(sddk::memory_t::host), tmp.ld());
450 #pragma omp parallel for
451 for (
int ia = 0; ia < atom_type.num_atoms(); ia++) {
452 for (
int i = 0; i < nbf * (nbf + 1) / 2; i++) {
453 forces_us_(ivec, atom_type.atom_id(ia)) += ctx_.unit_cell().omega() * reduce_g_fact *
454 dm(i, ia, ispin) * aug_op.sym_weight(i) *
462 ctx_.comm().allreduce(&forces_us_(0, 0), 3 * ctx_.unit_cell().num_atoms());
464 symmetrize_forces(ctx_.unit_cell(), forces_us_);
469sddk::mdarray<double, 2>
const&
470Force::calc_forces_scf_corr()
472 PROFILE(
"sirius::Force::calc_forces_scf_corr");
474 auto q = ctx_.gvec().shells_len();
476 auto ff = ctx_.ri().ps_rho_->values(q, ctx_.comm());
479 forces_scf_corr_.zero();
482 auto& dveff = potential_.dveff();
488 int gvec_count = gvec.
count();
489 int gvec_offset = gvec.
offset();
491 double fact = gvec.reduced() ? 2.0 : 1.0;
493 int ig0 = ctx_.gvec().skip_g0();
495 #pragma omp parallel for
496 for (
int ia = 0; ia < unit_cell.
num_atoms(); ia++) {
501 for (
int igloc = ig0; igloc < gvec_count; igloc++) {
502 int ig = gvec_offset + igloc;
503 int igsh = ctx_.gvec().shell(ig);
506 auto gvec_cart = gvec.
gvec_cart<index_domain_t::local>(igloc);
509 std::complex<double> z =
510 fact *
fourpi * ff(igsh, iat) *
std::conj(dveff.f_pw_local(igloc) * ctx_.gvec_phase_factor(ig, ia));
513 for (
int x : {0, 1, 2}) {
514 forces_scf_corr_(x, ia) -= (gvec_cart[x] * z).imag();
518 ctx_.comm().allreduce(&forces_scf_corr_(0, 0), 3 * ctx_.unit_cell().num_atoms());
520 symmetrize_forces(ctx_.unit_cell(), forces_scf_corr_);
522 return forces_scf_corr_;
526Force::calc_forces_core()
528 PROFILE(
"sirius::Force::calc_forces_core");
530 auto q = ctx_.gvec().shells_len();
532 auto ff = ctx_.ri().ps_core_->values(q, ctx_.comm());
538 auto& xc_pot = potential_.xc_potential();
541 xc_pot.rg().fft_transform(-1);
547 int gvec_count = gvecs.
count();
548 int gvec_offset = gvecs.
offset();
550 double fact = gvecs.reduced() ? 2.0 : 1.0;
553 #pragma omp parallel for
554 for (
int ia = 0; ia < unit_cell.
num_atoms(); ia++) {
556 if (atom.
type().ps_core_charge_density().empty()) {
561 for (
int igloc = ctx_.gvec().skip_g0(); igloc < gvec_count; igloc++) {
562 int ig = gvec_offset + igloc;
563 auto igsh = ctx_.gvec().shell(ig);
566 auto gvec_cart = gvecs.
gvec_cart<index_domain_t::local>(igloc);
569 std::complex<double> z =
570 fact *
fourpi * ff(igsh, iat) *
std::conj(xc_pot.rg().f_pw_local(igloc) * ctx_.gvec_phase_factor(ig, ia));
573 for (
int x : {0, 1, 2}) {
574 forces_core_(x, ia) -= (gvec_cart[x] * z).imag();
578 ctx_.comm().allreduce(&forces_core_(0, 0), 3 * ctx_.unit_cell().num_atoms());
580 symmetrize_forces(ctx_.unit_cell(), forces_core_);
589 auto r = ctx_.unit_cell().num_hubbard_wf();
593 potential_.U().compute_occupancies_derivatives(kp__, q_op__, dn);
595 #pragma omp parallel for
596 for (
int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
598 for (
int dir = 0; dir < 3; dir++) {
599 std::complex<double> d{0.0};
600 for (
int at_lvl = 0; at_lvl < static_cast<int>(potential_.hubbard_potential().local().size()); at_lvl++) {
601 const int ia1 = potential_.hubbard_potential().atomic_orbitals(at_lvl).first;
602 const auto& atom = ctx_.unit_cell().atom(ia1);
603 const int lo = potential_.hubbard_potential().atomic_orbitals(at_lvl).second;
606 const int offset = potential_.hubbard_potential().offset(at_lvl);
607 for (
int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
608 for (
int m2 = 0; m2 < lmax_at; m2++) {
609 for (
int m1 = 0; m1 < lmax_at; m1++) {
610 d += potential_.hubbard_potential().local(at_lvl)(m1, m2, ispn) *
611 dn(offset + m2, offset + m1, ispn, dir, ia);
618 for (
int i = 0; i < ctx_.cfg().hubbard().nonlocal().size(); i++) {
619 auto nl = ctx_.cfg().hubbard().nonlocal(i);
620 int ia1 = nl.atom_pair()[0];
621 int ja = nl.atom_pair()[1];
629 auto z1 = std::exp(std::complex<double>(0, -twopi * dot(
r3::vector<int>(Tr), kp__.vk())));
630 const int at_lvl1 = potential_.hubbard_potential().find_orbital_index(ia1, in, il);
631 const int at_lvl2 = potential_.hubbard_potential().find_orbital_index(ja, jn, jl);
632 const int offset1 = potential_.hubbard_potential().offset(at_lvl1);
633 const int offset2 = potential_.hubbard_potential().offset(at_lvl2);
634 for (
int is = 0; is < ctx_.num_spins(); is++) {
635 for (
int m2 = 0; m2 < 2 * jl + 1; m2++) {
636 for (
int m1 = 0; m1 < 2 * il + 1; m1++) {
637 auto result1_ = z1 *
std::conj(dn(offset2 + m2, offset1 + m1, is, dir, ia)) *
638 potential_.hubbard_potential().nonlocal(i)(m1, m2, is);
639 d += std::real(result1_);
644 forceh__(dir, ia) -= std::real(d);
650Force::calc_forces_vloc()
652 PROFILE(
"sirius::Force::calc_forces_vloc");
654 auto q = ctx_.gvec().shells_len();
656 auto ff = ctx_.ri().vloc_->values(q, ctx_.comm());
661 auto& valence_rho = density_.rho();
665 auto const& gvecs = ctx_.
gvec();
667 int gvec_count = gvecs.
gvec_count(ctx_.comm().rank());
668 int gvec_offset = gvecs.
gvec_offset(ctx_.comm().rank());
670 double fact = valence_rho.rg().gvec().reduced() ? 2.0 : 1.0;
673 #pragma omp parallel for
674 for (
int ia = 0; ia < unit_cell.
num_atoms(); ia++) {
679 for (
int igloc = 0; igloc < gvec_count; igloc++) {
680 int ig = gvec_offset + igloc;
681 int igsh = ctx_.gvec().shell(ig);
684 auto gvec_cart = gvecs.
gvec_cart<index_domain_t::local>(igloc);
687 std::complex<double> z = fact *
fourpi * ff(igsh, iat) *
std::conj(valence_rho.rg().f_pw_local(igloc)) *
688 std::conj(ctx_.gvec_phase_factor(ig, ia));
691 for (
int x : {0, 1, 2}) {
692 forces_vloc_(x, ia) -= (gvec_cart[x] * z).imag();
697 ctx_.comm().allreduce(&forces_vloc_(0, 0), 3 * ctx_.unit_cell().num_atoms());
699 symmetrize_forces(ctx_.unit_cell(), forces_vloc_);
704sddk::mdarray<double, 2>
const&
705Force::calc_forces_usnl()
708 calc_forces_nonloc();
710 forces_usnl_ = sddk::mdarray<double, 2>(3, ctx_.unit_cell().num_atoms());
711 for (
int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
712 for (
int x : {0, 1, 2}) {
713 forces_usnl_(x, ia) = forces_us_(x, ia) + forces_nonloc_(x, ia);
721Force::add_ibs_force(K_point<double>* kp__, Hamiltonian_k<double>& Hk__, sddk::mdarray<double, 2>& ffac__,
722 sddk::mdarray<double, 2>& forcek__)
const
724 PROFILE(
"sirius::Force::ibs_force");
726 auto& uc = ctx_.unit_cell();
728 auto& bg = ctx_.blacs_grid();
730 auto bs = ctx_.cyclic_block_size();
732 auto nfv = ctx_.num_fv_states();
734 auto ngklo = kp__->gklo_basis_size();
737 la::dmatrix<std::complex<double>> dm(nfv, nfv, bg, bs, bs);
738 compute_dmat(kp__, dm);
741 auto& fv_evec = kp__->fv_eigen_vectors();
743 la::dmatrix<std::complex<double>> h(ngklo, ngklo, bg, bs, bs);
744 la::dmatrix<std::complex<double>> o(ngklo, ngklo, bg, bs, bs);
746 la::dmatrix<std::complex<double>> h1(ngklo, ngklo, bg, bs, bs);
747 la::dmatrix<std::complex<double>> o1(ngklo, ngklo, bg, bs, bs);
749 la::dmatrix<std::complex<double>> zm1(ngklo, nfv, bg, bs, bs);
750 la::dmatrix<std::complex<double>> zf(nfv, nfv, bg, bs, bs);
752 sddk::mdarray<std::complex<double>, 2> alm_row(kp__->num_gkvec_row(), uc.max_mt_aw_basis_size());
753 sddk::mdarray<std::complex<double>, 2> alm_col(kp__->num_gkvec_col(), uc.max_mt_aw_basis_size());
754 sddk::mdarray<std::complex<double>, 2> halm_col(kp__->num_gkvec_col(), uc.max_mt_aw_basis_size());
756 for (
int ia = 0; ia < uc.num_atoms(); ia++) {
760 auto& atom = uc.atom(ia);
761 auto& type = atom.
type();
764 kp__->alm_coeffs_row().generate<
true>(atom, alm_row);
765 kp__->alm_coeffs_col().generate<
false>(atom, alm_col);
768 Hk__.set_fv_h_o_apw_lo(atom, ia, alm_row, alm_col, h, o);
771 Hk__.H0().apply_hmt_to_apw<spin_block_t::nm>(atom, kp__->num_gkvec_col(), alm_col, halm_col);
774 la::wrap(la::lib_t::blas)
775 .gemm(
'N',
'T', kp__->num_gkvec_row(), kp__->num_gkvec_col(), type.mt_aw_basis_size(),
776 &la::constant<std::complex<double>>::one(), alm_row.at(sddk::memory_t::host), alm_row.ld(),
777 alm_col.at(sddk::memory_t::host), alm_col.ld(), &la::constant<std::complex<double>>
::zero(),
778 o.at(sddk::memory_t::host), o.ld());
781 la::wrap(la::lib_t::blas)
782 .gemm(
'N',
'T', kp__->num_gkvec_row(), kp__->num_gkvec_col(), type.mt_aw_basis_size(),
783 &la::constant<std::complex<double>>::one(), alm_row.at(sddk::memory_t::host), alm_row.ld(),
784 halm_col.at(sddk::memory_t::host), halm_col.ld(), &la::constant<std::complex<double>>
::zero(),
785 h.at(sddk::memory_t::host), h.ld());
789 for (
int igk_col = 0; igk_col < kp__->num_gkvec_col(); igk_col++) {
790 auto gvec_col = kp__->gkvec_col().gvec<index_domain_t::local>(igk_col);
791 auto gkvec_col_cart = kp__->gkvec_col().gkvec_cart<index_domain_t::local>(igk_col);
792 for (
int igk_row = 0; igk_row < kp__->num_gkvec_row(); igk_row++) {
793 auto gvec_row = kp__->gkvec_row().gvec<index_domain_t::local>(igk_row);
794 auto gkvec_row_cart = kp__->gkvec_row().gkvec_cart<index_domain_t::local>(igk_row);
796 int ig12 = ctx_.gvec().index_g12(gvec_row, gvec_col);
798 int igs = ctx_.gvec().shell(ig12);
800 auto zt =
std::conj(ctx_.gvec_phase_factor(ig12, ia)) * ffac__(iat, igs) *
fourpi / uc.omega();
802 double t1 = 0.5 * dot(gkvec_row_cart, gkvec_col_cart);
804 h(igk_row, igk_col) -= t1 * zt;
805 o(igk_row, igk_col) -= zt;
809 for (
int x = 0; x < 3; x++) {
810 for (
int igk_col = 0; igk_col < kp__->num_gkvec_col(); igk_col++) {
811 auto gvec_col = kp__->gkvec_col().gvec<index_domain_t::local>(igk_col);
812 for (
int igk_row = 0; igk_row < kp__->num_gkvec_row(); igk_row++) {
813 auto gvec_row = kp__->gkvec_row().gvec<index_domain_t::local>(igk_row);
815 int ig12 = ctx_.gvec().index_g12(gvec_row, gvec_col);
817 auto vg = ctx_.gvec().gvec_cart<index_domain_t::global>(ig12);
819 h1(igk_row, igk_col) = std::complex<double>(0.0, vg[x]) * h(igk_row, igk_col);
821 o1(igk_row, igk_col) = std::complex<double>(0.0, vg[x]) * o(igk_row, igk_col);
825 for (
int icol = 0; icol < kp__->num_lo_col(); icol++) {
826 for (
int igk_row = 0; igk_row < kp__->num_gkvec_row(); igk_row++) {
827 auto gkvec_row_cart = kp__->gkvec_row().gkvec_cart<index_domain_t::local>(igk_row);
829 h1(igk_row, icol + kp__->num_gkvec_col()) =
830 std::complex<double>(0.0, gkvec_row_cart[x]) * h(igk_row, icol + kp__->num_gkvec_col());
832 o1(igk_row, icol + kp__->num_gkvec_col()) =
833 std::complex<double>(0.0, gkvec_row_cart[x]) * o(igk_row, icol + kp__->num_gkvec_col());
837 for (
int irow = 0; irow < kp__->num_lo_row(); irow++) {
838 for (
int igk_col = 0; igk_col < kp__->num_gkvec_col(); igk_col++) {
839 auto gkvec_col_cart = kp__->gkvec_col().gkvec_cart<index_domain_t::local>(igk_col);
841 h1(irow + kp__->num_gkvec_row(), igk_col) =
842 std::complex<double>(0.0, -gkvec_col_cart[x]) * h(irow + kp__->num_gkvec_row(), igk_col);
844 o1(irow + kp__->num_gkvec_row(), igk_col) =
845 std::complex<double>(0.0, -gkvec_col_cart[x]) * o(irow + kp__->num_gkvec_row(), igk_col);
850 la::wrap(la::lib_t::scalapack)
851 .gemm(
'N',
'N', ngklo, nfv, ngklo, &la::constant<std::complex<double>>::one(), o1, 0, 0, fv_evec, 0, 0,
852 &la::constant<std::complex<double>>
::zero(), zm1, 0, 0);
854 for (
int i = 0; i < zm1.num_cols_local(); i++) {
855 int ist = zm1.icol(i);
856 for (
int j = 0; j < kp__->gklo_basis_size_row(); j++) {
857 zm1(j, i) *= kp__->fv_eigen_value(ist);
861 la::wrap(la::lib_t::scalapack)
862 .gemm(
'N',
'N', ngklo, nfv, ngklo, &la::constant<std::complex<double>>::one(), h1, 0, 0, fv_evec, 0, 0,
863 &la::constant<std::complex<double>>::m_one(), zm1, 0, 0);
866 la::wrap(la::lib_t::scalapack)
867 .gemm(
'C',
'N', nfv, nfv, ngklo, &la::constant<std::complex<double>>::one(), fv_evec, 0, 0, zm1, 0, 0,
868 &la::constant<std::complex<double>>
::zero(), zf, 0, 0);
870 for (
int i = 0; i < dm.num_cols_local(); i++) {
871 for (
int j = 0; j < dm.num_rows_local(); j++) {
872 forcek__(x, ia) += kp__->weight() * std::real(dm(j, i) * zf(j, i));
880Force::print_info(std::ostream& out__,
int verbosity__)
882 auto print_forces = [&](std::string label__, sddk::mdarray<double, 2>
const& forces) {
883 out__ <<
"==== " << label__ <<
" =====" << std::endl;
884 for (
int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
885 out__ <<
"atom: " << std::setw(4) << ia <<
", force: " << ffmt(15, 7) << forces(0, ia) <<
886 ffmt(15, 7) << forces(1, ia) << ffmt(15, 7) << forces(2, ia) << std::endl;
890 if (verbosity__ >= 1) {
892 print_forces(
"total Forces in Ha/bohr", forces_total());
895 if (!ctx_.full_potential() && verbosity__ >= 2) {
896 print_forces(
"ultrasoft contribution from Qij", forces_us());
898 print_forces(
"non-local contribution from Beta-projector", forces_nonloc());
900 print_forces(
"contribution from local potential", forces_vloc());
902 print_forces(
"contribution from core density", forces_core());
904 print_forces(
"Ewald forces from ions", forces_ewald());
906 if (ctx_.hubbard_correction()) {
907 print_forces(
"contribution from Hubbard correction", forces_hubbard());
914Force::calc_forces_nonloc_aux<double>();
915#if defined(SIRIUS_USE_FP32)
918Force::calc_forces_nonloc_aux<float>();
Contains implementation of sirius::Augmentation_operator class.
Contains declaration and implementation of sirius::Beta_projectors class.
Contains declaration and implementation of sirius::Beta_projectors_gradient class.
const auto & lo_descriptor_hub() const
int id() const
Return atom type id.
Data and methods specific to the actual atom in the unit cell.
int type_id() const
Return atom type id.
Atom_type const & type() const
Return const reference to corresponding atom type object.
double band_occupancy(int j__, int ispn__) const
Get band occupancy.
Representation of a unit cell.
Atom const & atom(int id__) const
Return const atom instance by id.
int num_atoms() const
Number of atoms in the unit cell.
A set of G-vectors for FFTs and G+k basis functions.
int gvec_count(int rank__) const
Number of G-vectors for a fine-grained distribution.
int offset() const
Offset (in the global index) of G-vectors for a fine-grained distribution for a current MPI rank.
int count() const
Number of G-vectors for a fine-grained distribution for the current MPI rank.
int gvec_offset(int rank__) const
Offset (in the global index) of G-vectors for a fine-grained distribution.
r3::vector< int > gvec(int ig__) const
Return G vector in fractional coordinates.
r3::vector< double > gvec_cart(int ig__) const
Return G vector in Cartesian coordinates.
void gemm(char transa, char transb, ftn_int m, ftn_int n, ftn_int k, T const *alpha, T const *A, ftn_int lda, T const *B, ftn_int ldb, T const *beta, T *C, ftn_int ldc, acc::stream_id sid=acc::stream_id(-1)) const
General matrix-matrix multiplication.
Simple implementation of 3d vector.
Contains definition and partial implementation of sirius::Density class.
Contains definition of sirius::Force class.
Contains declaration and definition of sirius::Hamiltonian class.
Contains definition of sirius::K_point class.
Contains declaration and partial implementation of sirius::K_point_set class.
void zero(T *ptr__, size_t n__)
Zero the device memory.
lib_t
Type of linear algebra backend library.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
Namespace of the SIRIUS library.
Smooth_periodic_vector_function< T > gradient(Smooth_periodic_function< T > &f__)
Gradient of the function in the plane-wave domain.
const double y00
First spherical harmonic .
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
double unit_step_function_form_factors(double R__, double g__)
Utility function to generate LAPW unit step function.
Common operation for forces and stress tensor.
Contains declaration and partial implementation of sirius::Potential class.
Generate unit step function for LAPW method.
Symmetrize atomic forces.