35template <
typename T,
typename F>
39 PROFILE(
"sirius::Stress|nonloc");
42 collect_result.
zero();
44 stress_nonloc_.zero();
47 if (ctx_.unit_cell().max_mt_basis_size() == 0) {
51 print_memory_usage(ctx_.
out(), FILE_LINE);
53 for (
auto it : kset_.spl_num_kpoints()) {
54 auto kp = kset_.get<T>(it.i);
56 auto mg = kp->spinor_wave_functions().memory_guard(mem, wf::copy_to::device);
59 add_k_point_contribution_nonlocal<T, F>(ctx_, bp_strain_deriv, *kp, collect_result);
67 for (
int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
68 for (
int i = 0; i < 3; i++) {
69 for (
int j = 0; j < 3; j++) {
70 tmp_stress(i, j) -= collect_result(j * 3 + i, ia);
76 stress_nonloc_ += tmp_stress;
81 stress_nonloc_ *= (1.0 / ctx_.unit_cell().omega());
83 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_nonloc_);
87Stress::calc_stress_total()
97 stress_hubbard_.zero();
98 if (ctx_.hubbard_correction()) {
99 calc_stress_hubbard();
101 stress_total_.zero();
103 for (
int mu = 0; mu < 3; mu++) {
104 for (
int nu = 0; nu < 3; nu++) {
105 stress_total_(mu, nu) = stress_kin_(mu, nu) + stress_har_(mu, nu) + stress_ewald_(mu, nu) +
106 stress_vloc_(mu, nu) + stress_core_(mu, nu) + stress_xc_(mu, nu) +
107 stress_us_(mu, nu) + stress_nonloc_(mu, nu) + stress_hubbard_(mu, nu);
110 return stress_total_;
114Stress::calc_stress_hubbard()
116 stress_hubbard_.zero();
117 auto r = ctx_.unit_cell().num_hubbard_wf();
120 if (ctx_.unit_cell().max_mt_basis_size() == 0) {
121 RTE_THROW(
"Hubbard forces : Your pseudo potentials do not have beta projectors. This need a proper fix");
122 return stress_hubbard_;
125 Q_operator<double> q_op(ctx_);
127 auto nhwf = ctx_.unit_cell().num_hubbard_wf().first;
129 sddk::mdarray<std::complex<double>, 4> dn(nhwf, nhwf, 2, 9);
133 for (
auto it : kset_.spl_num_kpoints()) {
134 auto kp = kset_.get<
double>(it.i);
146 RTE_THROW(
"Hubbard stress correction is only implemented for the simple hubbard correction.");
150 potential_.U().compute_occupancies_stress_derivatives(*kp, q_op, dn);
151 for (
int dir1 = 0; dir1 < 3; dir1++) {
152 for (
int dir2 = 0; dir2 < 3; dir2++) {
153 for (
int at_lvl = 0; at_lvl < static_cast<int>(potential_.hubbard_potential().local().size());
155 const int ia1 = potential_.hubbard_potential().atomic_orbitals(at_lvl).first;
156 const auto& atom = ctx_.unit_cell().atom(ia1);
157 const int lo = potential_.hubbard_potential().atomic_orbitals(at_lvl).second;
158 if (atom.type().lo_descriptor_hub(lo).use_for_calculation()) {
159 const int lmax_at = 2 * atom.type().lo_descriptor_hub(lo).l() + 1;
160 const int offset = potential_.hubbard_potential().offset(at_lvl);
161 for (
int ispn = 0; ispn < ctx_.
num_spins(); ispn++) {
162 for (
int m1 = 0; m1 < lmax_at; m1++) {
163 for (
int m2 = 0; m2 < lmax_at; m2++) {
164 stress_hubbard_(dir1, dir2) -=
165 (potential_.hubbard_potential().local(at_lvl)(m2, m1, ispn) *
166 dn(offset + m1, offset + m2, ispn, dir1 + 3 * dir2))
168 ctx_.unit_cell().omega();
176 for (
int i = 0; i < ctx_.cfg().hubbard().nonlocal().size(); i++) {
177 auto nl = ctx_.cfg().hubbard().nonlocal(i);
178 int ia = nl.atom_pair()[0];
179 int ja = nl.atom_pair()[1];
186 auto z1 = std::exp(std::complex<double>(0, -
twopi * dot(r3::vector<int>(Tr), kp->vk())));
187 const int at_lvl1 = potential_.hubbard_potential().find_orbital_index(ia, in, il);
188 const int at_lvl2 = potential_.hubbard_potential().find_orbital_index(ja, jn, jl);
189 const int offset1 = potential_.hubbard_potential().offset(at_lvl1);
190 const int offset2 = potential_.hubbard_potential().offset(at_lvl2);
192 for (
int is = 0; is < ctx_.
num_spins(); is++) {
193 for (
int m2 = 0; m2 < 2 * jl + 1; m2++) {
194 for (
int m1 = 0; m1 < 2 * il + 1; m1++) {
195 auto result1_ = z1 *
std::conj(dn(offset2 + m2, offset1 + m1, is, dir1 + 3 * dir2)) *
196 potential_.hubbard_potential().nonlocal(i)(m1, m2, is);
197 d += std::real(result1_);
202 stress_hubbard_(dir1, dir2) -= d / ctx_.unit_cell().omega();
208 kset_.comm().allreduce(&stress_hubbard_(0, 0), 9);
209 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_hubbard_);
211 stress_hubbard_ = -1.0 * stress_hubbard_;
213 return stress_hubbard_;
226 for (
int ia = 0; (ia < ctx_.unit_cell().num_atoms()) && empty; ia++) {
227 Atom& atom = ctx_.unit_cell().atom(ia);
228 if (!atom.
type().ps_core_charge_density().empty()) {
237 potential_.xc_potential().rg().fft_transform(-1);
239 auto q = ctx_.
gvec().shells_len();
240 auto const ff = ctx_.ri().ps_core_djl_->values(q, ctx_.
comm());
241 auto drhoc = make_periodic_function<index_domain_t::local>(ctx_.unit_cell(), ctx_.
gvec(),
242 ctx_.phase_factors_t(), ff);
245 int ig0 = ctx_.
gvec().skip_g0();
247 for (
int igloc = ig0; igloc < ctx_.
gvec().count(); igloc++) {
251 for (
int mu : {0, 1, 2}) {
252 for (
int nu : {0, 1, 2}) {
253 stress_core_(mu, nu) -=
254 std::real(
std::conj(potential_.xc_potential().rg().f_pw_local(igloc)) * drhoc[igloc]) *
259 sdiag += std::real(
std::conj(potential_.xc_potential().rg().f_pw_local(igloc)) *
260 density_.rho_pseudo_core().f_pw_local(igloc));
263 if (ctx_.
gvec().reduced()) {
269 std::real(
std::conj(potential_.xc_potential().rg().f_pw_local(0)) * density_.rho_pseudo_core().f_pw_local(0));
272 for (
int mu : {0, 1, 2}) {
273 stress_core_(mu, mu) -= sdiag;
278 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_core_);
291 for (
int l = 0; l < 3; l++) {
292 stress_xc_(l, l) = e / ctx_.unit_cell().omega();
295 if (potential_.is_gradient_correction()) {
305 rhovc += density_.
rho().rg();
306 rhovc += density_.rho_pseudo_core();
309 rhovc.fft_transform(-1);
315 for (
int x : {0, 1, 2}) {
316 grad_rho[x].fft_transform(1);
319 for (
int irloc = 0; irloc < ctx_.spfft<
double>().local_slice_size(); irloc++) {
320 for (
int mu = 0; mu < 3; mu++) {
321 for (
int nu = 0; nu < 3; nu++) {
323 2 * grad_rho[mu].value(irloc) * grad_rho[nu].value(irloc) * potential_.vsigma(0).value(irloc);
328 auto result = get_rho_up_dn<true>(density_);
329 auto& rho_up = *result[0];
330 auto& rho_dn = *result[1];
333 rho_up.fft_transform(-1);
334 rho_dn.fft_transform(-1);
337 auto grad_rho_up =
gradient(rho_up);
338 auto grad_rho_dn =
gradient(rho_dn);
341 for (
int x : {0, 1, 2}) {
342 grad_rho_up[x].fft_transform(1);
343 grad_rho_dn[x].fft_transform(1);
346 for (
int irloc = 0; irloc < ctx_.spfft<
double>().local_slice_size(); irloc++) {
347 for (
int mu = 0; mu < 3; mu++) {
348 for (
int nu = 0; nu < 3; nu++) {
349 t(mu, nu) += grad_rho_up[mu].value(irloc) * grad_rho_up[nu].value(irloc) * 2 *
350 potential_.vsigma(0).value(irloc) +
351 (grad_rho_up[mu].value(irloc) * grad_rho_dn[nu].value(irloc) +
352 grad_rho_dn[mu].value(irloc) * grad_rho_up[nu].value(irloc)) *
353 potential_.vsigma(1).value(irloc) +
354 grad_rho_dn[mu].value(irloc) * grad_rho_dn[nu].value(irloc) * 2 *
355 potential_.vsigma(2).value(irloc);
361 t *= (-1.0 / ctx_.fft_grid().num_points());
365 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_xc_);
373 PROFILE(
"sirius::Stress|us");
378 if (ctx_.unit_cell().max_mt_basis_size() == 0) {
382 potential_.fft_transform(-1);
388 switch (ctx_.processing_unit()) {
389 case sddk::device_t::CPU: {
390 mp = &get_memory_pool(sddk::memory_t::host);
392 qmem = sddk::memory_t::host;
395 case sddk::device_t::GPU: {
396 mp = &get_memory_pool(sddk::memory_t::host_pinned);
398 qmem = sddk::memory_t::host;
403 for (
int iat = 0; iat < ctx_.unit_cell().num_atom_types(); iat++) {
404 auto& atom_type = ctx_.unit_cell().atom_type(iat);
405 if (!atom_type.augment() || atom_type.num_atoms() == 0) {
410 *ctx_.ri().aug_djl_);
412 auto nbf = atom_type.mt_basis_size();
418 get_memory_pool(sddk::memory_t::host));
420 PROFILE_START(
"sirius::Stress|us|phase_fac");
421 #pragma omp parallel for
422 for (
int igloc = 0; igloc < ctx_.
gvec().count(); igloc++) {
423 int ig = ctx_.
gvec().offset() + igloc;
424 for (
int i = 0; i < atom_type.num_atoms(); i++) {
428 PROFILE_STOP(
"sirius::Stress|us|phase_fac");
433 for (
int nu = 0; nu < 3; nu++) {
436 for (
int ispin = 0; ispin < ctx_.
num_mag_dims() + 1; ispin++) {
437 for (
int mu = 0; mu < 3; mu++) {
438 PROFILE_START(
"sirius::Stress|us|prepare");
441 for (
int ia = 0; ia < atom_type.num_atoms(); ia++) {
442 v_tmp(ia, 0) = v_tmp(ia, 1) = 0;
446 #pragma omp parallel for
447 for (
int igloc = igloc0; igloc < ctx_.
gvec().count(); igloc++) {
449 double g = gvc.length();
451 for (
int ia = 0; ia < atom_type.num_atoms(); ia++) {
452 auto z = phase_factors(ia, igloc) * potential_.component(ispin).rg().f_pw_local(igloc) *
454 v_tmp(ia, 2 * igloc) = z.real();
455 v_tmp(ia, 2 * igloc + 1) = z.imag();
458 PROFILE_STOP(
"sirius::Stress|us|prepare");
460 PROFILE_START(
"sirius::Stress|us|gemm");
461 la::wrap(la).
gemm(
'N',
'T', nbf * (nbf + 1) / 2, atom_type.num_atoms(), 2 * ctx_.
gvec().count(),
464 tmp.at(sddk::memory_t::host), tmp.
ld());
465 PROFILE_STOP(
"sirius::Stress|us|gemm");
467 for (
int ia = 0; ia < atom_type.num_atoms(); ia++) {
468 for (
int i = 0; i < nbf * (nbf + 1) / 2; i++) {
469 stress_us_(mu, nu) += tmp(i, ia) * dm(i, ia, ispin) * q_deriv.sym_weight(i);
478 if (ctx_.
gvec().reduced()) {
482 stress_us_ *= (1.0 / ctx_.unit_cell().omega());
484 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_us_);
492 PROFILE(
"sirius::Stress|ewald");
494 stress_ewald_.zero();
498 auto& uc = ctx_.unit_cell();
500 int ig0 = ctx_.
gvec().skip_g0();
501 for (
int igloc = ig0; igloc < ctx_.
gvec().count(); igloc++) {
502 int ig = ctx_.
gvec().offset() + igloc;
505 double g2 = std::pow(G.length(), 2);
506 double g2lambda = g2 / 4.0 / lambda;
508 std::complex<double> rho(0, 0);
510 for (
int ia = 0; ia < uc.num_atoms(); ia++) {
514 double a1 =
twopi * std::pow(std::abs(rho) / uc.omega(), 2) * std::exp(-g2lambda) / g2;
516 for (
int mu : {0, 1, 2}) {
517 for (
int nu : {0, 1, 2}) {
518 stress_ewald_(mu, nu) += a1 * G[mu] * G[nu] * 2 * (g2lambda + 1) / g2;
522 for (
int mu : {0, 1, 2}) {
523 stress_ewald_(mu, mu) -= a1;
527 if (ctx_.
gvec().reduced()) {
533 for (
int mu : {0, 1, 2}) {
534 stress_ewald_(mu, mu) +=
twopi * std::pow(uc.num_electrons() / uc.omega(), 2) / 4 / lambda;
537 for (
int ia = 0; ia < uc.num_atoms(); ia++) {
538 for (
int i = 1; i < uc.num_nearest_neighbours(ia); i++) {
539 auto ja = uc.nearest_neighbour(i, ia).atom_id;
540 auto d = uc.nearest_neighbour(i, ia).distance;
541 auto rc = uc.nearest_neighbour(i, ia).rc;
543 double a1 = (0.5 * uc.atom(ia).zn() * uc.atom(ja).zn() / uc.omega() / std::pow(d, 3)) *
544 (-2 * std::exp(-lambda * std::pow(d, 2)) * std::sqrt(lambda /
pi) * d -
545 std::erfc(std::sqrt(lambda) * d));
547 for (
int mu : {0, 1, 2}) {
548 for (
int nu : {0, 1, 2}) {
549 stress_ewald_(mu, nu) += a1 * rc[mu] * rc[nu];
555 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_ewald_);
557 return stress_ewald_;
561Stress::print_info(std::ostream& out__,
int verbosity__)
const
564 out__ <<
"=== " << label__ <<
" ===" << std::endl;
565 for (
int mu : {0, 1, 2}) {
566 out__ << ffmt(12, 6) << s(mu, 0)
567 << ffmt(12, 6) << s(mu, 1)
568 << ffmt(12, 6) << s(mu, 2) << std::endl;
572 const double au2kbar = 2.94210119E5;
573 auto stress_kin = stress_kin_ * au2kbar;
574 auto stress_har = stress_har_ * au2kbar;
575 auto stress_ewald = stress_ewald_ * au2kbar;
576 auto stress_vloc = stress_vloc_ * au2kbar;
577 auto stress_xc = stress_xc_ * au2kbar;
578 auto stress_nonloc = stress_nonloc_ * au2kbar;
579 auto stress_us = stress_us_ * au2kbar;
580 auto stress_hubbard = stress_hubbard_ * au2kbar;
581 auto stress_core = stress_core_ * au2kbar;
583 out__ <<
"=== stress tensor components [kbar] ===" << std::endl;
585 print_stress(
"stress_kin", stress_kin);
587 print_stress(
"stress_har", stress_har);
589 print_stress(
"stress_ewald", stress_ewald);
591 print_stress(
"stress_vloc", stress_vloc);
593 print_stress(
"stress_xc", stress_xc);
595 print_stress(
"stress_core", stress_core);
597 print_stress(
"stress_nonloc", stress_nonloc);
599 print_stress(
"stress_us", stress_us);
601 stress_us = stress_us + stress_nonloc;
602 print_stress(
"stress_us_nl", stress_us);
604 if (ctx_.hubbard_correction()) {
605 print_stress(
"stress_hubbard", stress_hubbard);
608 auto stress_total = stress_total_ * au2kbar;
609 print_stress(
"stress_total", stress_total);
615 PROFILE(
"sirius::Stress|har");
619 int ig0 = ctx_.
gvec().skip_g0();
620 for (
int igloc = ig0; igloc < ctx_.
gvec().count(); igloc++) {
622 double g2 = std::pow(G.length(), 2);
623 auto z = density_.
rho().rg().f_pw_local(igloc);
624 double d =
twopi * (std::pow(z.real(), 2) + std::pow(z.imag(), 2)) / g2;
626 for (
int mu : {0, 1, 2}) {
627 for (
int nu : {0, 1, 2}) {
628 stress_har_(mu, nu) += d * 2 * G[mu] * G[nu] / g2;
631 for (
int mu : {0, 1, 2}) {
632 stress_har_(mu, mu) -= d;
636 if (ctx_.
gvec().reduced()) {
642 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_har_);
653 for (
auto it : kset_.spl_num_kpoints()) {
654 auto kp = kset_.get<T>(it.i);
656 double fact = kp->gkvec().reduced() ? 2.0 : 1.0;
657 fact *= kp->weight();
662 for (
int ispin = 0; ispin < ctx_.
num_spins(); ispin++) {
664 for (
int i = 0; i < kp->num_occupied_bands(ispin); i++) {
665 for (
int igloc = 0; igloc < kp->num_gkvec_loc(); igloc++) {
666 auto Gk = kp->gkvec().template gkvec_cart<index_domain_t::local>(igloc);
668 double f = kp->band_occupancy(i, ispin);
670 double d = fact * f * (std::pow(z.real(), 2) + std::pow(z.imag(), 2));
671 for (
int mu : {0, 1, 2}) {
672 for (
int nu : {0, 1, 2}) {
673 tmp(mu, nu) += Gk[mu] * Gk[nu] * d;
686 stress_kin_ *= (-1.0 / ctx_.unit_cell().omega());
688 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_kin_);
692Stress::calc_stress_kin()
694 PROFILE(
"sirius::Stress|kin");
695 if (ctx_.cfg().parameters().precision_wf() ==
"fp32") {
696#if defined(SIRIUS_USE_FP32)
697 this->calc_stress_kin_aux<float>();
700 this->calc_stress_kin_aux<double>();
708 PROFILE(
"sirius::Stress|vloc");
712 auto q = ctx_.
gvec().shells_len();
713 auto const ri_vloc = ctx_.ri().vloc_->values(q, ctx_.
comm());
714 auto const ri_vloc_dg = ctx_.ri().vloc_djl_->values(q, ctx_.
comm());
716 auto v = make_periodic_function<index_domain_t::local>(ctx_.unit_cell(), ctx_.
gvec(),
717 ctx_.phase_factors_t(), ri_vloc);
718 auto dv = make_periodic_function<index_domain_t::local>(ctx_.unit_cell(), ctx_.
gvec(),
719 ctx_.phase_factors_t(), ri_vloc_dg);
723 int ig0 = ctx_.
gvec().skip_g0();
724 for (
int igloc = ig0; igloc < ctx_.
gvec().count(); igloc++) {
728 for (
int mu : {0, 1, 2}) {
729 for (
int nu : {0, 1, 2}) {
730 stress_vloc_(mu, nu) +=
731 std::real(
std::conj(density_.
rho().rg().f_pw_local(igloc)) * dv[igloc]) * G[mu] * G[nu];
735 sdiag += std::real(
std::conj(density_.
rho().rg().f_pw_local(igloc)) * v[igloc]);
738 if (ctx_.
gvec().reduced()) {
743 sdiag += std::real(
std::conj(density_.
rho().rg().f_pw_local(0)) * v[0]);
746 for (
int mu : {0, 1, 2}) {
747 stress_vloc_(mu, mu) -= sdiag;
752 symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_vloc_);
758Stress::calc_stress_nonloc()
760 if (ctx_.cfg().parameters().precision_wf() ==
"fp32") {
761#if defined(SIRIUS_USE_FP32)
763 calc_stress_nonloc_aux<float, float>();
765 calc_stress_nonloc_aux<float, std::complex<float>>();
768 RTE_THROW(
"Not compiled with FP32 support");
772 calc_stress_nonloc_aux<double, double>();
774 calc_stress_nonloc_aux<double, std::complex<double>>();
778 return stress_nonloc_;
Data and methods specific to the actual atom in the unit cell.
Atom_type const & type() const
Return const reference to corresponding atom type object.
Augmentation charge operator Q(r) of the ultrasoft pseudopotential formalism.
void generate_pw_coeffs_gvec_deriv(int nu__)
Generate G-vector derivative Q_{xi,xi'}(G)/dG of the plane-wave coefficients */.
sddk::mdarray< double, 3 > density_matrix_aux(Atom_type const &atom_type__) const
Return density matrix for all atoms of a given type in auxiliary form.
auto const & rho() const
Return const reference to charge density (scalar functions).
auto gvec_phase_factor(r3::vector< int > G__, int ia__) const
Phase factors .
auto const & gvec() const
Return const reference to Gvec object.
auto processing_unit_memory_t() const
Return the memory type for processing unit.
auto gvec_fft_sptr() const
Return shared pointer to Gvec_fft object.
std::ostream & out() const
Return output stream.
double ewald_lambda() const
Find the lambda parameter used in the Ewald summation.
mpi::Communicator const & comm() const
Total communicator of the simulation.
int num_spins() const
Number of spin components.
bool gamma_point(bool gamma_point__)
Set flag for Gamma-point calculation.
int num_mag_dims() const
Number of dimensions in the magnetization vector.
Representation of a smooth (Fourier-transformable) periodic function.
void zero()
Zero the values on the regular real-space grid and plane-wave coefficients.
r3::matrix< double > calc_stress_xc()
XC contribution to stress.
r3::matrix< double > calc_stress_ewald()
Ewald energy contribution to stress.
r3::matrix< double > calc_stress_us()
Contribution to the stress tensor from the augmentation operator.
r3::matrix< double > calc_stress_vloc()
Local potential contribution to stress.
r3::matrix< double > calc_stress_har()
Hartree energy contribution to stress.
void calc_stress_kin_aux()
Kinetic energy contribution to stress.
void calc_stress_nonloc_aux()
Non-local contribution to stress.
r3::matrix< double > calc_stress_core()
Non-linear core correction to stress tensor.
void gemm(char transa, char transb, ftn_int m, ftn_int n, ftn_int k, T const *alpha, T const *A, ftn_int lda, T const *B, ftn_int ldb, T const *beta, T *C, ftn_int ldc, acc::stream_id sid=acc::stream_id(-1)) const
General matrix-matrix multiplication.
MPI communicator wrapper.
void allreduce(T *buffer__, int count__) const
Perform the in-place (the output buffer is used as the input buffer) all-to-all reduction.
int rank() const
Rank of MPI process inside communicator.
Multidimensional array with the column-major (Fortran) order.
void zero(memory_t mem__, size_t idx0__, size_t n__)
Zero n elements starting from idx0.
uint32_t ld() const
Return leading dimension size.
Contains definition of sirius::K_point class.
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
memory_t
Memory types where the code can store data.
lib_t
Type of linear algebra backend library.
@ spla
SPLA library. Can take CPU and device pointers.
Namespace of the SIRIUS library.
Smooth_periodic_vector_function< T > gradient(Smooth_periodic_function< T > &f__)
Gradient of the function in the plane-wave domain.
double energy_vxc(Density const &density, Potential const &potential)
Returns exchange correlation potential.
double energy_bxc(const Density &density, const Potential &potential)
TODO doc.
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
double energy_exc(Density const &density, Potential const &potential)
Returns exchange correlation energy.
Common operation for forces and stress tensor.
Simple classes and functions to work with vectors and matrices of the R^3 space.
Contains definition of sirius::Stress tensor class.
Symmetrize lattice stress tensor.