d4/d58/stress_8cpp_source.html

// Copyright (c) 2013-2019 Anton Kozhevnikov, Thomas Schulthess

// All rights reserved.

//

// Redistribution and use in source and binary forms, with or without modification, are permitted provided that

// the following conditions are met:

//

// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the

//    following disclaimer.

// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions

//    and the following disclaimer in the documentation and/or other materials provided with the distribution.

//

// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED

// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A

// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR

// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR

// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


/** \file stress.cpp

 *

 *  \brief Contains implementation of sirius::Stress class.

 */


#include "core/r3/r3.hpp"

#include "core/profiler.hpp"

#include "k_point/k_point.hpp"

#include "non_local_functor.hpp"

#include "dft/energy.hpp"

#include "symmetry/symmetrize_stress_tensor.hpp"

#include "stress.hpp"


namespace sirius {


template <typename T, typename F>

void

Stress::calc_stress_nonloc_aux()

{

    PROFILE("sirius::Stress|nonloc");


    sddk::mdarray<real_type<F>, 2> collect_result(9, ctx_.unit_cell().num_atoms());

    collect_result.zero();


    stress_nonloc_.zero();


    /* if there are no beta projectors then get out there */

    if (ctx_.unit_cell().max_mt_basis_size() == 0) {

        return;

    }


    print_memory_usage(ctx_.out(), FILE_LINE);


    for (auto it : kset_.spl_num_kpoints()) {

        auto kp = kset_.get<T>(it.i);

        auto mem = ctx_.processing_unit_memory_t();

        auto mg = kp->spinor_wave_functions().memory_guard(mem, wf::copy_to::device);

        Beta_projectors_strain_deriv<T> bp_strain_deriv(ctx_, kp->gkvec());


        add_k_point_contribution_nonlocal<T, F>(ctx_, bp_strain_deriv, *kp, collect_result);

    }


    #pragma omp parallel

    {

        r3::matrix<double> tmp_stress; // TODO: test pragma omp parallel for reduction(+:stress)


        #pragma omp for

        for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {

            for (int i = 0; i < 3; i++) {

                for (int j = 0; j < 3; j++) {

                    tmp_stress(i, j) -= collect_result(j * 3 + i, ia);

                }

            }

        }


        #pragma omp critical

        stress_nonloc_ += tmp_stress;

    }


    ctx_.comm().allreduce(&stress_nonloc_(0, 0), 9);


    stress_nonloc_ *= (1.0 / ctx_.unit_cell().omega());


    symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_nonloc_);

}


r3::matrix<double>

Stress::calc_stress_total()

{

    calc_stress_kin();

    calc_stress_har();

    calc_stress_ewald();

    calc_stress_vloc();

    calc_stress_core();

    calc_stress_xc();

    calc_stress_us();

    calc_stress_nonloc();

    stress_hubbard_.zero();

    if (ctx_.hubbard_correction()) {

        calc_stress_hubbard();

    }

    stress_total_.zero();


    for (int mu = 0; mu < 3; mu++) {

        for (int nu = 0; nu < 3; nu++) {

            stress_total_(mu, nu) = stress_kin_(mu, nu) + stress_har_(mu, nu) + stress_ewald_(mu, nu) +

                                    stress_vloc_(mu, nu) + stress_core_(mu, nu) + stress_xc_(mu, nu) +

                                    stress_us_(mu, nu) + stress_nonloc_(mu, nu) + stress_hubbard_(mu, nu);

        }

    }

    return stress_total_;

}


r3::matrix<double>

Stress::calc_stress_hubbard()

{

    stress_hubbard_.zero();

    auto r = ctx_.unit_cell().num_hubbard_wf();

    /* if there are no beta projectors then get out there */

    /* TODO : Need to fix the case where pp have no beta projectors */

    if (ctx_.unit_cell().max_mt_basis_size() == 0) {

        RTE_THROW("Hubbard forces : Your pseudo potentials do not have beta projectors. This need a proper fix");

        return stress_hubbard_;

    }


    Q_operator<double> q_op(ctx_);


    auto nhwf = ctx_.unit_cell().num_hubbard_wf().first;


    sddk::mdarray<std::complex<double>, 4> dn(nhwf, nhwf, 2, 9);

    if (is_device_memory(ctx_.processing_unit_memory_t())) {

        dn.allocate(ctx_.processing_unit_memory_t());

    }

    for (auto it : kset_.spl_num_kpoints()) {

        auto kp = kset_.get<double>(it.i);

        dn.zero();

        if (is_device_memory(ctx_.processing_unit_memory_t())) {

            dn.zero(ctx_.processing_unit_memory_t());

        }

        // kp->beta_projectors().prepare();

        auto mg1 = kp->spinor_wave_functions().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);

        auto mg2 = kp->hubbard_wave_functions_S().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);

        auto mg3 = kp->atomic_wave_functions().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);

        auto mg4 = kp->atomic_wave_functions_S().memory_guard(ctx_.processing_unit_memory_t(), wf::copy_to::device);


        if (ctx_.num_mag_dims() == 3) {

            RTE_THROW("Hubbard stress correction is only implemented for the simple hubbard correction.");

        }


        /* compute the derivative of the occupancies numbers */

        potential_.U().compute_occupancies_stress_derivatives(*kp, q_op, dn);

        for (int dir1 = 0; dir1 < 3; dir1++) {

            for (int dir2 = 0; dir2 < 3; dir2++) {

                for (int at_lvl = 0; at_lvl < static_cast<int>(potential_.hubbard_potential().local().size());

                     at_lvl++) {

                    const int ia1    = potential_.hubbard_potential().atomic_orbitals(at_lvl).first;

                    const auto& atom = ctx_.unit_cell().atom(ia1);

                    const int lo     = potential_.hubbard_potential().atomic_orbitals(at_lvl).second;

                    if (atom.type().lo_descriptor_hub(lo).use_for_calculation()) {

                        const int lmax_at = 2 * atom.type().lo_descriptor_hub(lo).l() + 1;

                        const int offset  = potential_.hubbard_potential().offset(at_lvl);

                        for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {

                            for (int m1 = 0; m1 < lmax_at; m1++) {

                                for (int m2 = 0; m2 < lmax_at; m2++) {

                                    stress_hubbard_(dir1, dir2) -=

                                        (potential_.hubbard_potential().local(at_lvl)(m2, m1, ispn) *

                                         dn(offset + m1, offset + m2, ispn, dir1 + 3 * dir2))

                                        .real() /

                                        ctx_.unit_cell().omega();

                                }

                            }

                        }

                    }

                }


                double d = 0;

                for (int i = 0; i < ctx_.cfg().hubbard().nonlocal().size(); i++) {

                    auto nl = ctx_.cfg().hubbard().nonlocal(i);

                    int ia = nl.atom_pair()[0];

                    int ja  = nl.atom_pair()[1];

                    int il  = nl.l()[0];

                    int jl  = nl.l()[1];

                    int in  = nl.n()[0];

                    int jn  = nl.n()[1];

                    auto Tr = nl.T();


                    auto z1           = std::exp(std::complex<double>(0, -twopi * dot(r3::vector<int>(Tr), kp->vk())));

                    const int at_lvl1 = potential_.hubbard_potential().find_orbital_index(ia, in, il);

                    const int at_lvl2 = potential_.hubbard_potential().find_orbital_index(ja, jn, jl);

                    const int offset1 = potential_.hubbard_potential().offset(at_lvl1);

                    const int offset2 = potential_.hubbard_potential().offset(at_lvl2);


                    for (int is = 0; is < ctx_.num_spins(); is++) {

                        for (int m2 = 0; m2 < 2 * jl + 1; m2++) {

                            for (int m1 = 0; m1 < 2 * il + 1; m1++) {

                              auto result1_ = z1 * std::conj(dn(offset2 + m2, offset1 + m1, is, dir1 + 3 * dir2)) *

                                    potential_.hubbard_potential().nonlocal(i)(m1, m2, is);

                                d += std::real(result1_);

                            }

                        }

                    }

                }

                stress_hubbard_(dir1, dir2) -= d / ctx_.unit_cell().omega();

            }

        }

    }


    /* global reduction */

    kset_.comm().allreduce(&stress_hubbard_(0, 0), 9);

    symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_hubbard_);


    stress_hubbard_ = -1.0 * stress_hubbard_;


    return stress_hubbard_;

}


r3::matrix<double>

Stress::calc_stress_core()

{

    stress_core_.zero();


    bool empty = true;


    /* check if the core atomic wave functions are set up or not */

    /* if not then the core correction is simply ignored */


    for (int ia = 0; (ia < ctx_.unit_cell().num_atoms()) && empty; ia++) {

        Atom& atom = ctx_.unit_cell().atom(ia);

        if (!atom.type().ps_core_charge_density().empty()) {

            empty = false;

        }

    }


    if (empty) {

        return stress_core_;

    }


    potential_.xc_potential().rg().fft_transform(-1);


    auto q        = ctx_.gvec().shells_len();

    auto const ff = ctx_.ri().ps_core_djl_->values(q, ctx_.comm());

    auto drhoc    = make_periodic_function<index_domain_t::local>(ctx_.unit_cell(), ctx_.gvec(),

                        ctx_.phase_factors_t(), ff);


    double sdiag{0};

    int ig0 = ctx_.gvec().skip_g0();


    for (int igloc = ig0; igloc < ctx_.gvec().count(); igloc++) {

        auto G = ctx_.gvec().gvec_cart<index_domain_t::local>(igloc);

        auto g = G.length();


        for (int mu : {0, 1, 2}) {

            for (int nu : {0, 1, 2}) {

                stress_core_(mu, nu) -=

                    std::real(std::conj(potential_.xc_potential().rg().f_pw_local(igloc)) * drhoc[igloc]) *

                    G[mu] * G[nu] / g;

            }

        }


        sdiag += std::real(std::conj(potential_.xc_potential().rg().f_pw_local(igloc)) *

                           density_.rho_pseudo_core().f_pw_local(igloc));

    }


    if (ctx_.gvec().reduced()) {

        stress_core_ *= 2;

        sdiag *= 2;

    }

    if (ctx_.comm().rank() == 0) {

        sdiag +=

            std::real(std::conj(potential_.xc_potential().rg().f_pw_local(0)) * density_.rho_pseudo_core().f_pw_local(0));

    }


    for (int mu : {0, 1, 2}) {

        stress_core_(mu, mu) -= sdiag;

    }


    ctx_.comm().allreduce(&stress_core_(0, 0), 9);


    symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_core_);


    return stress_core_;

}


r3::matrix<double>

Stress::calc_stress_xc()

{

    stress_xc_.zero();


    double e = sirius::energy_exc(density_, potential_) - sirius::energy_vxc(density_, potential_) -

               sirius::energy_bxc(density_, potential_);


    for (int l = 0; l < 3; l++) {

        stress_xc_(l, l) = e / ctx_.unit_cell().omega();

    }


    if (potential_.is_gradient_correction()) {


        r3::matrix<double> t;


        /* factor 2 in the expression for gradient correction comes from the

           derivative of sigm (which is grad(rho) * grad(rho)) */


        if (ctx_.num_spins() == 1) {

            Smooth_periodic_function<double> rhovc(ctx_.spfft<double>(), ctx_.gvec_fft_sptr());

            rhovc.zero();

            rhovc += density_.rho().rg();

            rhovc += density_.rho_pseudo_core();


            /* transform to PW domain */

            rhovc.fft_transform(-1);


            /* generate pw coeffs of the gradient */

            auto grad_rho = gradient(rhovc);


            /* gradient in real space */

            for (int x : {0, 1, 2}) {

                grad_rho[x].fft_transform(1);

            }


            for (int irloc = 0; irloc < ctx_.spfft<double>().local_slice_size(); irloc++) {

                for (int mu = 0; mu < 3; mu++) {

                    for (int nu = 0; nu < 3; nu++) {

                        t(mu, nu) +=

                            2 * grad_rho[mu].value(irloc) * grad_rho[nu].value(irloc) * potential_.vsigma(0).value(irloc);

                    }

                }

            }

        } else {

            auto result  = get_rho_up_dn<true>(density_);

            auto& rho_up = *result[0];

            auto& rho_dn = *result[1];


            /* transform to PW domain */

            rho_up.fft_transform(-1);

            rho_dn.fft_transform(-1);


            /* generate pw coeffs of the gradient */

            auto grad_rho_up = gradient(rho_up);

            auto grad_rho_dn = gradient(rho_dn);


            /* gradient in real space */

            for (int x : {0, 1, 2}) {

                grad_rho_up[x].fft_transform(1);

                grad_rho_dn[x].fft_transform(1);

            }


            for (int irloc = 0; irloc < ctx_.spfft<double>().local_slice_size(); irloc++) {

                for (int mu = 0; mu < 3; mu++) {

                    for (int nu = 0; nu < 3; nu++) {

                        t(mu, nu) += grad_rho_up[mu].value(irloc) * grad_rho_up[nu].value(irloc) * 2 *

                                         potential_.vsigma(0).value(irloc) +

                                     (grad_rho_up[mu].value(irloc) * grad_rho_dn[nu].value(irloc) +

                                      grad_rho_dn[mu].value(irloc) * grad_rho_up[nu].value(irloc)) *

                                         potential_.vsigma(1).value(irloc) +

                                     grad_rho_dn[mu].value(irloc) * grad_rho_dn[nu].value(irloc) * 2 *

                                         potential_.vsigma(2).value(irloc);

                    }

                }

            }

        }

        mpi::Communicator(ctx_.spfft<double>().communicator()).allreduce(&t(0, 0), 9);

        t *= (-1.0 / ctx_.fft_grid().num_points());

        stress_xc_ += t;

    }


    symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_xc_);


    return stress_xc_;

}


r3::matrix<double>

Stress::calc_stress_us()

{

    PROFILE("sirius::Stress|us");


    stress_us_.zero();


    /* check if we have beta projectors. Only for pseudo potentials */

    if (ctx_.unit_cell().max_mt_basis_size() == 0) {

        return stress_us_;

    }


    potential_.fft_transform(-1);


    la::lib_t la{la::lib_t::none};

    sddk::memory_t qmem{sddk::memory_t::none};


    sddk::memory_pool* mp{nullptr};

    switch (ctx_.processing_unit()) {

        case sddk::device_t::CPU: {

            mp   = &get_memory_pool(sddk::memory_t::host);

            la   = la::lib_t::blas;

            qmem = sddk::memory_t::host;

            break;

        }

        case sddk::device_t::GPU: {

            mp   = &get_memory_pool(sddk::memory_t::host_pinned);

            la   = la::lib_t::spla;

            qmem = sddk::memory_t::host;

            break;

        }

    }


    for (int iat = 0; iat < ctx_.unit_cell().num_atom_types(); iat++) {

        auto& atom_type = ctx_.unit_cell().atom_type(iat);

        if (!atom_type.augment() || atom_type.num_atoms() == 0) {

            continue;

        }


        Augmentation_operator q_deriv(ctx_.unit_cell().atom_type(iat), ctx_.gvec(), *ctx_.ri().aug_,

                *ctx_.ri().aug_djl_);


        auto nbf = atom_type.mt_basis_size();


        /* get auxiliary density matrix */

        auto dm = density_.density_matrix_aux(atom_type);


        sddk::mdarray<std::complex<double>, 2> phase_factors(atom_type.num_atoms(), ctx_.gvec().count(),

                                                       get_memory_pool(sddk::memory_t::host));


        PROFILE_START("sirius::Stress|us|phase_fac");

        #pragma omp parallel for

        for (int igloc = 0; igloc < ctx_.gvec().count(); igloc++) {

            int ig = ctx_.gvec().offset() + igloc;

            for (int i = 0; i < atom_type.num_atoms(); i++) {

                phase_factors(i, igloc) = ctx_.gvec_phase_factor(ig, atom_type.atom_id(i));

            }

        }

        PROFILE_STOP("sirius::Stress|us|phase_fac");


        sddk::mdarray<double, 2> v_tmp(atom_type.num_atoms(), ctx_.gvec().count() * 2, *mp);

        sddk::mdarray<double, 2> tmp(nbf * (nbf + 1) / 2, atom_type.num_atoms(), *mp);

        /* over spin components, can be from 1 to 4 */

        for (int nu = 0; nu < 3; nu++) {

            /* generate dQ(G)/dG */

            q_deriv.generate_pw_coeffs_gvec_deriv(nu);

            for (int ispin = 0; ispin < ctx_.num_mag_dims() + 1; ispin++) {

                for (int mu = 0; mu < 3; mu++) {

                    PROFILE_START("sirius::Stress|us|prepare");

                    int igloc0{0};

                    if (ctx_.comm().rank() == 0) {

                        for (int ia = 0; ia < atom_type.num_atoms(); ia++) {

                            v_tmp(ia, 0) = v_tmp(ia, 1) = 0;

                        }

                        igloc0 = 1;

                    }

                    #pragma omp parallel for

                    for (int igloc = igloc0; igloc < ctx_.gvec().count(); igloc++) {

                        auto gvc = ctx_.gvec().gvec_cart<index_domain_t::local>(igloc);

                        double g = gvc.length();


                        for (int ia = 0; ia < atom_type.num_atoms(); ia++) {

                            auto z = phase_factors(ia, igloc) * potential_.component(ispin).rg().f_pw_local(igloc) *

                                     (-gvc[mu] / g);

                            v_tmp(ia, 2 * igloc)     = z.real();

                            v_tmp(ia, 2 * igloc + 1) = z.imag();

                        }

                    }

                    PROFILE_STOP("sirius::Stress|us|prepare");


                    PROFILE_START("sirius::Stress|us|gemm");

                    la::wrap(la).gemm('N', 'T', nbf * (nbf + 1) / 2, atom_type.num_atoms(), 2 * ctx_.gvec().count(),

                                    &la::constant<double>::one(), q_deriv.q_pw().at(qmem), q_deriv.q_pw().ld(),

                                    v_tmp.at(sddk::memory_t::host), v_tmp.ld(), &la::constant<double>::zero(),

                                    tmp.at(sddk::memory_t::host), tmp.ld());

                    PROFILE_STOP("sirius::Stress|us|gemm");


                    for (int ia = 0; ia < atom_type.num_atoms(); ia++) {

                        for (int i = 0; i < nbf * (nbf + 1) / 2; i++) {

                            stress_us_(mu, nu) += tmp(i, ia) * dm(i, ia, ispin) * q_deriv.sym_weight(i);

                        }

                    }

                }

            }

        }

    }


    ctx_.comm().allreduce(&stress_us_(0, 0), 9);

    if (ctx_.gvec().reduced()) {

        stress_us_ *= 2;

    }


    stress_us_ *= (1.0 / ctx_.unit_cell().omega());


    symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_us_);


    return stress_us_;

}


r3::matrix<double>

Stress::calc_stress_ewald()

{

    PROFILE("sirius::Stress|ewald");


    stress_ewald_.zero();


    double lambda = ctx_.ewald_lambda();


    auto& uc = ctx_.unit_cell();


    int ig0 = ctx_.gvec().skip_g0();

    for (int igloc = ig0; igloc < ctx_.gvec().count(); igloc++) {

        int ig = ctx_.gvec().offset() + igloc;


        auto G          = ctx_.gvec().gvec_cart<index_domain_t::local>(igloc);

        double g2       = std::pow(G.length(), 2);

        double g2lambda = g2 / 4.0 / lambda;


        std::complex<double> rho(0, 0);


        for (int ia = 0; ia < uc.num_atoms(); ia++) {

            rho += ctx_.gvec_phase_factor(ig, ia) * static_cast<double>(uc.atom(ia).zn());

        }


        double a1 = twopi * std::pow(std::abs(rho) / uc.omega(), 2) * std::exp(-g2lambda) / g2;


        for (int mu : {0, 1, 2}) {

            for (int nu : {0, 1, 2}) {

                stress_ewald_(mu, nu) += a1 * G[mu] * G[nu] * 2 * (g2lambda + 1) / g2;

            }

        }


        for (int mu : {0, 1, 2}) {

            stress_ewald_(mu, mu) -= a1;

        }

    }


    if (ctx_.gvec().reduced()) {

        stress_ewald_ *= 2;

    }


    ctx_.comm().allreduce(&stress_ewald_(0, 0), 9);


    for (int mu : {0, 1, 2}) {

        stress_ewald_(mu, mu) += twopi * std::pow(uc.num_electrons() / uc.omega(), 2) / 4 / lambda;

    }


    for (int ia = 0; ia < uc.num_atoms(); ia++) {

        for (int i = 1; i < uc.num_nearest_neighbours(ia); i++) {

            auto ja = uc.nearest_neighbour(i, ia).atom_id;

            auto d  = uc.nearest_neighbour(i, ia).distance;

            auto rc = uc.nearest_neighbour(i, ia).rc;


            double a1 = (0.5 * uc.atom(ia).zn() * uc.atom(ja).zn() / uc.omega() / std::pow(d, 3)) *

                        (-2 * std::exp(-lambda * std::pow(d, 2)) * std::sqrt(lambda / pi) * d -

                         std::erfc(std::sqrt(lambda) * d));


            for (int mu : {0, 1, 2}) {

                for (int nu : {0, 1, 2}) {

                    stress_ewald_(mu, nu) += a1 * rc[mu] * rc[nu];

                }

            }

        }

    }


    symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_ewald_);


    return stress_ewald_;

}


void

Stress::print_info(std::ostream& out__, int verbosity__) const

{

    auto print_stress = [&](std::string label__, r3::matrix<double> const& s) {

        out__ << "=== " << label__ << " ===" << std::endl;

        for (int mu : {0, 1, 2}) {

           out__ << ffmt(12, 6) << s(mu, 0)

                 << ffmt(12, 6) << s(mu, 1)

                 << ffmt(12, 6) << s(mu, 2) << std::endl;

        }

    };


    const double au2kbar = 2.94210119E5;

    auto stress_kin      = stress_kin_ * au2kbar;

    auto stress_har      = stress_har_ * au2kbar;

    auto stress_ewald    = stress_ewald_ * au2kbar;

    auto stress_vloc     = stress_vloc_ * au2kbar;

    auto stress_xc       = stress_xc_ * au2kbar;

    auto stress_nonloc   = stress_nonloc_ * au2kbar;

    auto stress_us       = stress_us_ * au2kbar;

    auto stress_hubbard  = stress_hubbard_ * au2kbar;

    auto stress_core     = stress_core_ * au2kbar;


    out__ << "=== stress tensor components [kbar] ===" << std::endl;


    print_stress("stress_kin", stress_kin);


    print_stress("stress_har", stress_har);


    print_stress("stress_ewald", stress_ewald);


    print_stress("stress_vloc", stress_vloc);


    print_stress("stress_xc", stress_xc);


    print_stress("stress_core", stress_core);


    print_stress("stress_nonloc", stress_nonloc);


    print_stress("stress_us", stress_us);


    stress_us = stress_us + stress_nonloc;

    print_stress("stress_us_nl", stress_us);


    if (ctx_.hubbard_correction()) {

        print_stress("stress_hubbard", stress_hubbard);

    }


    auto stress_total = stress_total_ * au2kbar;

    print_stress("stress_total", stress_total);

}


r3::matrix<double>

Stress::calc_stress_har()

{

    PROFILE("sirius::Stress|har");


    stress_har_.zero();


    int ig0 = ctx_.gvec().skip_g0();

    for (int igloc = ig0; igloc < ctx_.gvec().count(); igloc++) {

        auto G    = ctx_.gvec().gvec_cart<index_domain_t::local>(igloc);

        double g2 = std::pow(G.length(), 2);

        auto z    = density_.rho().rg().f_pw_local(igloc);

        double d  = twopi * (std::pow(z.real(), 2) + std::pow(z.imag(), 2)) / g2;


        for (int mu : {0, 1, 2}) {

            for (int nu : {0, 1, 2}) {

                stress_har_(mu, nu) += d * 2 * G[mu] * G[nu] / g2;

            }

        }

        for (int mu : {0, 1, 2}) {

            stress_har_(mu, mu) -= d;

        }

    }


    if (ctx_.gvec().reduced()) {

        stress_har_ *= 2;

    }


    ctx_.comm().allreduce(&stress_har_(0, 0), 9);


    symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_har_);


    return stress_har_;

}


template <typename T>

void

Stress::calc_stress_kin_aux()

{

    stress_kin_.zero();


    for (auto it : kset_.spl_num_kpoints()) {

        auto kp = kset_.get<T>(it.i);


        double fact = kp->gkvec().reduced() ? 2.0 : 1.0;

        fact *=  kp->weight();


        #pragma omp parallel

        {

            r3::matrix<double> tmp;

            for (int ispin = 0; ispin < ctx_.num_spins(); ispin++) {

                #pragma omp for

                for (int i = 0; i < kp->num_occupied_bands(ispin); i++) {

                    for (int igloc = 0; igloc < kp->num_gkvec_loc(); igloc++) {

                        auto Gk = kp->gkvec().template gkvec_cart<index_domain_t::local>(igloc);


                        double f = kp->band_occupancy(i, ispin);

                        auto z = kp->spinor_wave_functions().pw_coeffs(igloc, wf::spin_index(ispin), wf::band_index(i));

                        double d = fact * f * (std::pow(z.real(), 2) + std::pow(z.imag(), 2));

                        for (int mu : {0, 1, 2}) {

                            for (int nu : {0, 1, 2}) {

                                tmp(mu, nu) += Gk[mu] * Gk[nu] * d;

                            }

                        }

                    }

                }

            }

            #pragma omp critical

            stress_kin_ += tmp;

        }

    } // ikloc


    ctx_.comm().allreduce(&stress_kin_(0, 0), 9);


    stress_kin_ *= (-1.0 / ctx_.unit_cell().omega());


    symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_kin_);

}


r3::matrix<double>

Stress::calc_stress_kin()

{

    PROFILE("sirius::Stress|kin");

    if (ctx_.cfg().parameters().precision_wf() == "fp32") {

#if defined(SIRIUS_USE_FP32)

        this->calc_stress_kin_aux<float>();

#endif

    } else {

        this->calc_stress_kin_aux<double>();

    }

    return stress_kin_;

}


r3::matrix<double>

Stress::calc_stress_vloc()

{

    PROFILE("sirius::Stress|vloc");


    stress_vloc_.zero();


    auto q                = ctx_.gvec().shells_len();

    auto const ri_vloc    = ctx_.ri().vloc_->values(q, ctx_.comm());

    auto const ri_vloc_dg = ctx_.ri().vloc_djl_->values(q, ctx_.comm());


    auto v  = make_periodic_function<index_domain_t::local>(ctx_.unit_cell(), ctx_.gvec(),

                ctx_.phase_factors_t(), ri_vloc);

    auto dv = make_periodic_function<index_domain_t::local>(ctx_.unit_cell(), ctx_.gvec(),

                ctx_.phase_factors_t(), ri_vloc_dg);


    double sdiag{0};


    int ig0 = ctx_.gvec().skip_g0();

    for (int igloc = ig0; igloc < ctx_.gvec().count(); igloc++) {


        auto G = ctx_.gvec().gvec_cart<index_domain_t::local>(igloc);


        for (int mu : {0, 1, 2}) {

            for (int nu : {0, 1, 2}) {

                stress_vloc_(mu, nu) +=

                    std::real(std::conj(density_.rho().rg().f_pw_local(igloc)) * dv[igloc]) * G[mu] * G[nu];

            }

        }


        sdiag += std::real(std::conj(density_.rho().rg().f_pw_local(igloc)) * v[igloc]);

    }


    if (ctx_.gvec().reduced()) {

        stress_vloc_ *= 2;

        sdiag *= 2;

    }

    if (ctx_.comm().rank() == 0) {

        sdiag += std::real(std::conj(density_.rho().rg().f_pw_local(0)) * v[0]);

    }


    for (int mu : {0, 1, 2}) {

        stress_vloc_(mu, mu) -= sdiag;

    }


    ctx_.comm().allreduce(&stress_vloc_(0, 0), 9);


    symmetrize_stress_tensor(ctx_.unit_cell().symmetry(), stress_vloc_);


    return stress_vloc_;

}


r3::matrix<double>

Stress::calc_stress_nonloc()

{

    if (ctx_.cfg().parameters().precision_wf() == "fp32") {

#if defined(SIRIUS_USE_FP32)

        if (ctx_.gamma_point()) {

            calc_stress_nonloc_aux<float, float>();

        } else {

            calc_stress_nonloc_aux<float, std::complex<float>>();

        }

#else

        RTE_THROW("Not compiled with FP32 support");

#endif

    } else {

        if (ctx_.gamma_point()) {

            calc_stress_nonloc_aux<double, double>();

        } else {

            calc_stress_nonloc_aux<double, std::complex<double>>();

        }

    }


    return stress_nonloc_;

}


} // namespace sirius

sirius::Atom
Data and methods specific to the actual atom in the unit cell.
Definition: atom.hpp:37

sirius::Atom::type
Atom_type const & type() const
Return const reference to corresponding atom type object.
Definition: atom.hpp:318

sirius::Augmentation_operator
Augmentation charge operator Q(r) of the ultrasoft pseudopotential formalism.
Definition: augmentation_operator.hpp:111

sirius::Augmentation_operator::generate_pw_coeffs_gvec_deriv
void generate_pw_coeffs_gvec_deriv(int nu__)
Generate G-vector derivative Q_{xi,xi'}(G)/dG of the plane-wave coefficients *‍/.
Definition: augmentation_operator.cpp:132

sirius::Beta_projectors_strain_deriv
Definition: beta_projectors_strain_deriv.hpp:34

sirius::Density::density_matrix_aux
sddk::mdarray< double, 3 > density_matrix_aux(Atom_type const &atom_type__) const
Return density matrix for all atoms of a given type in auxiliary form.
Definition: density.cpp:1810

sirius::Density::rho
auto const & rho() const
Return const reference to charge density (scalar functions).
Definition: density.hpp:416

sirius::Simulation_context::gvec_phase_factor
auto gvec_phase_factor(r3::vector< int > G__, int ia__) const
Phase factors .
Definition: simulation_context.hpp:552

sirius::Simulation_context::gvec
auto const & gvec() const
Return const reference to Gvec object.
Definition: simulation_context.hpp:404

sirius::Simulation_context::processing_unit_memory_t
auto processing_unit_memory_t() const
Return the memory type for processing unit.
Definition: simulation_context.hpp:622

sirius::Simulation_context::gvec_fft_sptr
auto gvec_fft_sptr() const
Return shared pointer to Gvec_fft object.
Definition: simulation_context.hpp:422

sirius::Simulation_context::out
std::ostream & out() const
Return output stream.
Definition: simulation_context.hpp:735

sirius::Simulation_context::ewald_lambda
double ewald_lambda() const
Find the lambda parameter used in the Ewald summation.
Definition: simulation_context.cpp:127

sirius::Simulation_context::comm
mpi::Communicator const & comm() const
Total communicator of the simulation.
Definition: simulation_context.hpp:453

sirius::Simulation_parameters::num_spins
int num_spins() const
Number of spin components.
Definition: simulation_parameters.hpp:266

sirius::Simulation_parameters::gamma_point
bool gamma_point(bool gamma_point__)
Set flag for Gamma-point calculation.
Definition: simulation_parameters.hpp:139

sirius::Simulation_parameters::num_mag_dims
int num_mag_dims() const
Number of dimensions in the magnetization vector.
Definition: simulation_parameters.hpp:256

sirius::Smooth_periodic_function
Representation of a smooth (Fourier-transformable) periodic function.
Definition: smooth_periodic_function.hpp:82

sirius::Smooth_periodic_function::zero
void zero()
Zero the values on the regular real-space grid and plane-wave coefficients.
Definition: smooth_periodic_function.hpp:167

sirius::Stress::calc_stress_xc
r3::matrix< double > calc_stress_xc()
XC contribution to stress.
Definition: stress.cpp:284

sirius::Stress::calc_stress_ewald
r3::matrix< double > calc_stress_ewald()
Ewald energy contribution to stress.
Definition: stress.cpp:490

sirius::Stress::calc_stress_us
r3::matrix< double > calc_stress_us()
Contribution to the stress tensor from the augmentation operator.
Definition: stress.cpp:371

sirius::Stress::calc_stress_vloc
r3::matrix< double > calc_stress_vloc()
Local potential contribution to stress.
Definition: stress.cpp:706

sirius::Stress::calc_stress_har
r3::matrix< double > calc_stress_har()
Hartree energy contribution to stress.
Definition: stress.cpp:613

sirius::Stress::calc_stress_kin_aux
void calc_stress_kin_aux()
Kinetic energy contribution to stress.
Definition: stress.cpp:649

sirius::Stress::calc_stress_nonloc_aux
void calc_stress_nonloc_aux()
Non-local contribution to stress.
Definition: stress.cpp:37

sirius::Stress::calc_stress_core
r3::matrix< double > calc_stress_core()
Non-linear core correction to stress tensor.
Definition: stress.cpp:217

sirius::la::wrap
Definition: linalg.hpp:62

sirius::la::wrap::gemm
void gemm(char transa, char transb, ftn_int m, ftn_int n, ftn_int k, T const *alpha, T const *A, ftn_int lda, T const *B, ftn_int ldb, T const *beta, T *C, ftn_int ldc, acc::stream_id sid=acc::stream_id(-1)) const
General matrix-matrix multiplication.

sirius::mpi::Communicator
MPI communicator wrapper.
Definition: communicator.hpp:241

sirius::mpi::Communicator::allreduce
void allreduce(T *buffer__, int count__) const
Perform the in-place (the output buffer is used as the input buffer) all-to-all reduction.
Definition: communicator.hpp:470

sirius::mpi::Communicator::rank
int rank() const
Rank of MPI process inside communicator.
Definition: communicator.hpp:393

sirius::r3::matrix< double >

sirius::sddk::mdarray
Multidimensional array with the column-major (Fortran) order.
Definition: memory.hpp:660

sirius::sddk::mdarray::zero
void zero(memory_t mem__, size_t idx0__, size_t n__)
Zero n elements starting from idx0.
Definition: memory.hpp:1316

sirius::sddk::mdarray::ld
uint32_t ld() const
Return leading dimension size.
Definition: memory.hpp:1233

sirius::sddk::memory_pool
Definition: memory.hpp:356

sirius::strong_type< int, struct __spin_index_tag >

energy.hpp
Total energy terms.

k_point.hpp
Contains definition of sirius::K_point class.

sirius::sddk::is_device_memory
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
Definition: memory.hpp:93

sirius::sddk::memory_t
memory_t
Memory types where the code can store data.
Definition: memory.hpp:71

sirius::la::lib_t
lib_t
Type of linear algebra backend library.
Definition: linalg_base.hpp:70

sirius::la::lib_t::none
@ none
None.

sirius::la::lib_t::spla
@ spla
SPLA library. Can take CPU and device pointers.

sirius::la::lib_t::blas
@ blas
CPU BLAS.

sirius
Namespace of the SIRIUS library.
Definition: sirius.f90:5

sirius::gradient
Smooth_periodic_vector_function< T > gradient(Smooth_periodic_function< T > &f__)
Gradient of the function in the plane-wave domain.
Definition: smooth_periodic_function.hpp:426

sirius::index_domain_t::local
@ local
Local index.

sirius::energy_vxc
double energy_vxc(Density const &density, Potential const &potential)
Returns exchange correlation potential.
Definition: energy.cpp:76

sirius::twopi
const double twopi
Definition: constants.hpp:45

sirius::pi
const double pi
Definition: constants.hpp:42

sirius::energy_bxc
double energy_bxc(const Density &density, const Potential &potential)
TODO doc.
Definition: energy.cpp:94

sirius::conj
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165

sirius::energy_exc
double energy_exc(Density const &density, Potential const &potential)
Returns exchange correlation energy.
Definition: energy.cpp:82

non_local_functor.hpp
Common operation for forces and stress tensor.

profiler.hpp
A time-based profiler.

r3.hpp
Simple classes and functions to work with vectors and matrices of the R^3 space.

stress.hpp
Contains definition of sirius::Stress tensor class.

sirius::la::constant
Definition: linalg_base.hpp:42

symmetrize_stress_tensor.hpp
Symmetrize lattice stress tensor.