d8/dcd/poisson_8cpp_source.html

// Copyright (c) 2013-2017 Anton Kozhevnikov, Thomas Schulthess

// All rights reserved.

//

// Redistribution and use in source and binary forms, with or without modification, are permitted provided that

// the following conditions are met:

//

// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the

//    following disclaimer.

// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions

//    and the following disclaimer in the documentation and/or other materials provided with the distribution.

//

// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED

// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A

// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR

// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR

// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


/** \file poisson.hpp

 *

 *  \brief Implementation of the Poisson solver for the full-potential muffin-tin case.

 */


#include "potential.hpp"

#include "core/profiler.hpp"

#include "lapw/sum_fg_fl_yg.hpp"


namespace sirius {


double density_residual_hartree_energy(Density const& rho1__, Density const& rho2__)

{

    double eh{0};

    auto const& gv = rho1__.ctx().gvec();

    #pragma omp parallel for reduction(+:eh)

    for (int igloc = gv.skip_g0(); igloc < gv.count(); igloc++) {

        auto z = rho1__.component(0).rg().f_pw_local(igloc) - rho2__.component(0).rg().f_pw_local(igloc);

        double g = gv.gvec_len<index_domain_t::local>(igloc);

        eh += (std::pow(z.real(), 2) + std::pow(z.imag(), 2)) / std::pow(g, 2);

    }

    gv.comm().allreduce(&eh, 1);

    eh *= twopi * rho1__.ctx().unit_cell().omega();

    if (gv.reduced()) {

        eh *= 2;

    }

    return eh;

}


void Potential::poisson_add_pseudo_pw(sddk::mdarray<std::complex<double>, 2>& qmt__,

                                      sddk::mdarray<std::complex<double>, 2>& qit__,

                                      std::complex<double>* rho_pw__)

{

    PROFILE("sirius::Potential::poisson_add_pseudo_pw");


    int lmmax = ctx_.lmmax_rho();

    int ngv = ctx_.gvec().count();


    /* The following term is added to the plane-wave coefficients of the charge density:

     * Integrate[SphericalBesselJ[l,a*x]*p[x,R]*x^2,{x,0,R},Assumptions->{l>=0,n>=0,R>0,a>0}] /

     *  Integrate[p[x,R]*x^(2+l),{x,0,R},Assumptions->{h>=0,n>=0,R>0}]

     * i.e. contributon from pseudodensity to l-th channel of plane wave expansion multiplied by

     * the difference bethween true and interstitial-in-the-mt multipole moments and divided by the

     * moment of the pseudodensity.

     */

    for (int iat = 0; iat < unit_cell_.num_atom_types(); iat++) {

        double R = unit_cell_.atom_type(iat).mt_radius();

        int na = unit_cell_.atom_type(iat).num_atoms();


        sddk::mdarray<std::complex<double>, 2> pf;

        sddk::mdarray<std::complex<double>, 2> qa;

        sddk::mdarray<std::complex<double>, 2> qapf;


        switch (ctx_.processing_unit()) {

            case sddk::device_t::CPU: {

                auto& mp = get_memory_pool(sddk::memory_t::host);

                pf = sddk::mdarray<std::complex<double>, 2>(ngv, na, mp);

                qa = sddk::mdarray<std::complex<double>, 2>(lmmax, na, mp);

                qapf = sddk::mdarray<std::complex<double>, 2>(lmmax, ngv, mp);

                break;

            }

            case sddk::device_t::GPU: {

                auto& mp = get_memory_pool(sddk::memory_t::host);

                auto& mpd = get_memory_pool(sddk::memory_t::device);

                /* allocate on GPU */

                pf = sddk::mdarray<std::complex<double>, 2>(nullptr, ngv, na);

                pf.allocate(mpd);

                /* allocate on CPU & GPU */

                qa = sddk::mdarray<std::complex<double>, 2>(lmmax, na, mp);

                qa.allocate(mpd);

                /* allocate on CPU & GPU */

                qapf = sddk::mdarray<std::complex<double>, 2>(lmmax, ngv, mp);

                qapf.allocate(mpd);

                break;

            }

        }


        ctx_.generate_phase_factors(iat, pf);


        for (int i = 0; i < unit_cell_.atom_type(iat).num_atoms(); i++) {

            int ia = unit_cell_.atom_type(iat).atom_id(i);

            for (int lm = 0; lm < ctx_.lmmax_rho(); lm++) {

                qa(lm, i) = qmt__(lm, ia) - qit__(lm, ia);

            }

        }


        switch (ctx_.processing_unit()) {

            case sddk::device_t::CPU: {

                la::wrap(la::lib_t::blas).gemm('N', 'C', ctx_.lmmax_rho(), ctx_.gvec().count(),

                    unit_cell_.atom_type(iat).num_atoms(), &la::constant<std::complex<double>>::one(),

                    qa.at(sddk::memory_t::host), qa.ld(), pf.at(sddk::memory_t::host), pf.ld(),

                    &la::constant<std::complex<double>>::zero(), qapf.at(sddk::memory_t::host), qapf.ld());

                break;

            }

            case sddk::device_t::GPU: {

                qa.copy_to(sddk::memory_t::device);

                la::wrap(la::lib_t::gpublas).gemm('N', 'C', ctx_.lmmax_rho(), ctx_.gvec().count(),

                    unit_cell_.atom_type(iat).num_atoms(), &la::constant<std::complex<double>>::one(),

                    qa.at(sddk::memory_t::device), qa.ld(), pf.at(sddk::memory_t::device), pf.ld(),

                    &la::constant<std::complex<double>>::zero(), qapf.at(sddk::memory_t::device), qapf.ld());

                qapf.copy_to(sddk::memory_t::host);

                break;

            }

        }


        double fourpi_omega = fourpi / unit_cell_.omega();


        /* add pseudo_density to interstitial charge density so that rho(G) has the correct

         * multipole moments in the muffin-tins */

        #pragma omp parallel for schedule(static)

        for (int igloc = ctx_.gvec().skip_g0(); igloc < ctx_.gvec().count(); igloc++) {

            double gR = ctx_.gvec().gvec_len<index_domain_t::local>(igloc) * R;

            double gRn = std::pow(2.0 / gR, pseudo_density_order_ + 1);


            std::complex<double> rho_G(0, 0);

            /* loop over atoms of the same type */

            for (int l = 0, lm = 0; l <= ctx_.lmax_rho(); l++) {

                std::complex<double> zt1(0, 0);

                for (int m = -l; m <= l; m++, lm++) {

                    zt1 += gvec_ylm_(lm, igloc) * qapf(lm, igloc);

                }

                rho_G += fourpi_omega * std::conj(zil_[l]) * zt1 * gamma_factors_R_(l, iat) *

                         sbessel_mt_(l + pseudo_density_order_ + 1, igloc, iat) * gRn;

            }

            rho_pw__[igloc] += rho_G;

        }

        /* for G=0 case */

        if (ctx_.comm().rank() == 0) {

            std::complex<double> rho_G(0, 0);

            for (int i = 0; i < unit_cell_.atom_type(iat).num_atoms(); i++) {

                int ia = unit_cell_.atom_type(iat).atom_id(i);

                rho_G += fourpi_omega * y00 * (qmt__(0, ia) - qit__(0, ia));

            }

            rho_pw__[0] += rho_G;

        }

    }

}


void Potential::poisson(Periodic_function<double> const& rho)

{

    PROFILE("sirius::Potential::poisson");


    /* in case of full potential we need to do pseudo-charge multipoles */

    if (ctx_.full_potential()) {


        /* true multipole moments */

        auto qmt = poisson_vmt(rho);


        if (env::print_checksum()) {

            print_checksum("qmt", qmt.checksum(), ctx_.out());

        }


        /* compute multipoles of interstitial density in MT region */

        auto qit = sum_fg_fl_yg(ctx_, ctx_.lmax_rho(), &rho.rg().f_pw_local(0), sbessel_mom_, gvec_ylm_);


        if (env::print_checksum()) {

            print_checksum("qit", qit.checksum(), ctx_.out());

        }


        /* add contribution from the pseudo-charge */

        poisson_add_pseudo_pw(qmt, qit, const_cast<std::complex<double>*>(&rho.rg().f_pw_local(0)));


        if (ctx_.cfg().control().verification() >= 2) {

            auto qit = sum_fg_fl_yg(ctx_, ctx_.lmax_rho(), &rho.rg().f_pw_local(0), sbessel_mom_, gvec_ylm_);


            double d = 0.0;

            for (int ia = 0; ia < unit_cell_.num_atoms(); ia++) {

                for (int lm = 0; lm < ctx_.lmmax_rho(); lm++) {

                    d += std::abs(qmt(lm, ia) - qit(lm, ia));

                }

            }

            if (ctx_.verbosity() >= 1) {

                RTE_OUT(ctx_.out()) << "pseudocharge error: " << d << std::endl;

            }

        }

    }


    /* compute pw coefficients of Hartree potential */

    if (ctx_.gvec().comm().rank() == 0) {

        hartree_potential_->rg().f_pw_local(0) = 0.0;

    }

    if (!ctx_.molecule()) {

        #pragma omp parallel for

        for (int igloc = ctx_.gvec().skip_g0(); igloc < ctx_.gvec().count(); igloc++) {

            hartree_potential_->rg().f_pw_local(igloc) = fourpi * rho.rg().f_pw_local(igloc) /

                std::pow(ctx_.gvec().gvec_len<index_domain_t::local>(igloc), 2);

        }

    } else {

        /* reference paper:


           Supercell technique for total-energy calculations of finite charged and polar systems

           M. R. Jarvis, I. D. White, R. W. Godby, and M. C. Payne

           Phys. Rev. B 56, 14972 – Published 15 December 1997

           DOI:https://doi.org/10.1103/PhysRevB.56.14972

        */

        double R_cut = 0.5 * std::pow(unit_cell_.omega(), 1.0 / 3);

        #pragma omp parallel for

        for (int igloc = ctx_.gvec().skip_g0(); igloc < ctx_.gvec().count(); igloc++) {

            auto glen = ctx_.gvec().gvec_len<index_domain_t::local>(igloc);

            hartree_potential_->rg().f_pw_local(igloc) = (fourpi * rho.rg().f_pw_local(igloc) / std::pow(glen, 2)) *

                                                    (1.0 - std::cos(glen * R_cut));

        }

    }


    //if (ctx_.control().print_checksum_) {

    //    auto z4 = mdarray<std::complex<double>, 1>(&vh->f_pw(0), ctx_.gvec().num_gvec()).checksum();

    //    if (ctx_.comm().rank() == 0) {

    //        DUMP("checksum(vh_pw): %20.14f %20.14f", z4.real(), z4.imag());

    //    }

    //}


    /* boundary condition for muffin-tins */

    if (ctx_.full_potential()) {

        /* compute V_lm at the MT boundary */

        auto vmtlm = sum_fg_fl_yg(ctx_, ctx_.lmax_pot(), &hartree_potential_->rg().f_pw_local(0), sbessel_mt_, gvec_ylm_);


        /* add boundary condition and convert to Rlm */

        PROFILE("sirius::Potential::poisson|bc");

        sddk::mdarray<double, 3> rRl(unit_cell_.max_num_mt_points(), ctx_.lmax_pot() + 1, unit_cell_.num_atom_types());

        for (int iat = 0; iat < unit_cell_.num_atom_types(); iat++) {

            int nmtp = unit_cell_.atom_type(iat).num_mt_points();

            double R = unit_cell_.atom_type(iat).mt_radius();


            #pragma omp parallel for default(shared)

            for (int l = 0; l <= ctx_.lmax_pot(); l++) {

                for (int ir = 0; ir < nmtp; ir++) {

                    rRl(ir, l, iat) = std::pow(unit_cell_.atom_type(iat).radial_grid(ir) / R, l);

                }

            }

        }


        for (auto it : unit_cell_.spl_num_atoms()) {

            int nmtp = unit_cell_.atom(it.i).num_mt_points();


            std::vector<double> vlm(ctx_.lmmax_pot());

            SHT::convert(ctx_.lmax_pot(), &vmtlm(0, it.i), &vlm[0]);


            #pragma omp parallel for default(shared)

            for (int lm = 0; lm < ctx_.lmmax_pot(); lm++) {

                int l = l_by_lm_[lm];


                for (int ir = 0; ir < nmtp; ir++) {

                    hartree_potential_->mt()[it.i](lm, ir) += vlm[lm] * rRl(ir, l, unit_cell_.atom(it.i).type_id());

                }

            }

            /* save electronic part of the potential at the point of origin */

#ifdef __VHA_AUX

            vh_el_(it.i) = y00 * hartree_potential_->mt()[it.i](0, 0) +

                unit_cell_.atom(it.i).zn() / unit_cell_.atom(it.i).radial_grid(0);

#else

            vh_el_(it.i) = y00 * hartree_potential_->mt()[it.i](0, 0);

#endif

        }

        ctx_.comm().allgather(vh_el_.at(sddk::memory_t::host), unit_cell_.spl_num_atoms().local_size(),

                unit_cell_.spl_num_atoms().global_offset());

    }


    /* transform Hartree potential to real space */

    hartree_potential_->rg().fft_transform(1);


    if (env::print_checksum()) {

        auto cs = hartree_potential_->rg().checksum_rg();

        auto cs1 = hartree_potential_->rg().checksum_pw();

        print_checksum("vha_rg", cs, ctx_.out());

        print_checksum("vha_pw", cs1, ctx_.out());

    }


    /* compute contribution from the smooth part of Hartree potential */

    energy_vha_ = sirius::inner(rho, hartree_potential());


#ifndef __VHA_AUX

    /* add nucleus potential and contribution to Hartree energy */

    if (ctx_.full_potential()) {

        double evha_nuc{0};

        for (auto it : unit_cell_.spl_num_atoms()) {

            auto& atom = unit_cell_.atom(it.i);

            Spline<double> srho(atom.radial_grid());

            for (int ir = 0; ir < atom.num_mt_points(); ir++) {

                double r = atom.radial_grid(ir);

                hartree_potential_->mt()[it.i](0, ir) -= atom.zn() / r / y00;

                srho(ir) = rho.mt()[it.i](0, ir) * r;

            }

            evha_nuc -= atom.zn() * srho.interpolate().integrate(0) / y00;

        }

        ctx_.comm().allreduce(&evha_nuc, 1);

        energy_vha_ += evha_nuc;

    }

#endif


    /* check values at MT boundary */

    //if (true) {

    //    /* compute V_lm at the MT boundary */

    //    auto vmtlm = ctx_.sum_fg_fl_yg(ctx_.lmax_pot(), &hartree_potential_->f_pw_local(0), sbessel_mt_, gvec_ylm_);


    //    for (int ialoc = 0; ialoc < unit_cell_.spl_num_atoms().local_size(); ialoc++) {

    //        int ia = unit_cell_.spl_num_atoms(ialoc);

    //        int nmtp = unit_cell_.atom(ia).num_mt_points();


    //        std::vector<double> vlm(ctx_.lmmax_pot());

    //        SHT::convert(ctx_.lmax_pot(), &vmtlm(0, ia), &vlm[0]);


    //        for (int lm = 0; lm < ctx_.lmmax_pot(); lm++) {

    //            printf("ia=%i lm=%i vlmdiff=%20.16f\n", ia, lm,

    //              std::abs(hartree_potential_->f_mt<index_domain_t::local>(lm, nmtp - 1, ialoc) - vlm[lm]));

    //        }

    //    }


    //}

}


} // namespace sirius

sirius::Atom_type::atom_id
int atom_id(int idx) const
Return atom ID (global index) by the index of atom within a given type.
Definition: atom_type.hpp:940

sirius::Atom_type::num_mt_points
int num_mt_points() const
Return number of muffin-tin radial grid points.
Definition: atom_type.hpp:718

sirius::Atom_type::mt_radius
double mt_radius() const
Return muffin-tin radius.
Definition: atom_type.hpp:712

sirius::Atom_type::num_atoms
int num_atoms() const
Return number of atoms of a given type.
Definition: atom_type.hpp:934

sirius::Atom::type_id
int type_id() const
Return atom type id.
Definition: atom.hpp:336

sirius::Periodic_function
Representation of the periodical function on the muffin-tin geometry.
Definition: periodic_function.hpp:52

sirius::Periodic_function::mt
auto & mt()
Return reference to spherical functions component.
Definition: periodic_function.hpp:275

sirius::Periodic_function::rg
auto & rg()
Return reference to regular space grid component.
Definition: periodic_function.hpp:263

sirius::Potential::poisson
void poisson(Periodic_function< double > const &rho)
Poisson solver.
Definition: poisson.cpp:158

sirius::Potential::hartree_potential_
std::unique_ptr< Periodic_function< double > > hartree_potential_
Hartree potential.
Definition: potential.hpp:55

sirius::Potential::unit_cell_
Unit_cell & unit_cell_
Alias to unit cell.
Definition: potential.hpp:49

sirius::Potential::vh_el_
sddk::mdarray< double, 1 > vh_el_
Electronic part of Hartree potential.
Definition: potential.hpp:101

sirius::Potential::poisson_vmt
auto poisson_vmt(Periodic_function< double > const &rho__) const
Compute MT part of the potential and MT multipole moments.
Definition: potential.hpp:160

sirius::Potential::sbessel_mom_
sddk::mdarray< double, 3 > sbessel_mom_
Moments of the spherical Bessel functions.
Definition: potential.hpp:79

sirius::Potential::poisson_add_pseudo_pw
void poisson_add_pseudo_pw(sddk::mdarray< std::complex< double >, 2 > &qmt__, sddk::mdarray< std::complex< double >, 2 > &qit__, std::complex< double > *rho_pw__)
Add contribution from the pseudocharge to the plane-wave expansion.
Definition: poisson.cpp:49

sirius::SHT::convert
static void convert(int lmax__, double const *f_rlm__, std::complex< double > *f_ylm__)
Convert form Rlm to Ylm representation.
Definition: sht.hpp:264

sirius::Simulation_context::gvec
auto const & gvec() const
Return const reference to Gvec object.
Definition: simulation_context.hpp:404

sirius::Simulation_context::generate_phase_factors
void generate_phase_factors(int iat__, sddk::mdarray< std::complex< double >, 2 > &phase_factors__) const
Generate phase factors  for all atoms of a given type.
Definition: simulation_context.cpp:1179

sirius::Simulation_context::out
std::ostream & out() const
Return output stream.
Definition: simulation_context.hpp:735

sirius::Simulation_context::comm
mpi::Communicator const & comm() const
Total communicator of the simulation.
Definition: simulation_context.hpp:453

sirius::Spline< double >

sirius::Spline::interpolate
Spline< T, U > & interpolate()
Definition: spline.hpp:275

sirius::Unit_cell::omega
double omega() const
Unit cell volume.
Definition: unit_cell.hpp:275

sirius::Unit_cell::max_num_mt_points
int max_num_mt_points() const
Maximum number of muffin-tin points among all atom types.
Definition: unit_cell.hpp:397

sirius::Unit_cell::atom
Atom const & atom(int id__) const
Return const atom instance by id.
Definition: unit_cell.hpp:344

sirius::Unit_cell::num_atom_types
int num_atom_types() const
Number of atom types.
Definition: unit_cell.hpp:281

sirius::Unit_cell::atom_type
Atom_type & atom_type(int id__)
Return atom type instance by id.
Definition: unit_cell.hpp:288

sirius::Unit_cell::num_atoms
int num_atoms() const
Number of atoms in the unit cell.
Definition: unit_cell.hpp:338

sirius::la::wrap
Definition: linalg.hpp:62

sirius::la::wrap::gemm
void gemm(char transa, char transb, ftn_int m, ftn_int n, ftn_int k, T const *alpha, T const *A, ftn_int lda, T const *B, ftn_int ldb, T const *beta, T *C, ftn_int ldc, acc::stream_id sid=acc::stream_id(-1)) const
General matrix-matrix multiplication.

sirius::mpi::Communicator::allgather
void allgather(T *buffer__, int const *recvcounts__, int const *displs__) const
In-place MPI_Allgatherv.
Definition: communicator.hpp:522

sirius::mpi::Communicator::allreduce
void allreduce(T *buffer__, int count__) const
Perform the in-place (the output buffer is used as the input buffer) all-to-all reduction.
Definition: communicator.hpp:470

sirius::mpi::Communicator::rank
int rank() const
Rank of MPI process inside communicator.
Definition: communicator.hpp:393

sirius::sddk::mdarray
Multidimensional array with the column-major (Fortran) order.
Definition: memory.hpp:660

sirius::sddk::mdarray::copy_to
void copy_to(memory_t mem__, size_t idx0__, size_t n__, acc::stream_id sid=acc::stream_id(-1))
Copy n elements starting from idx0 from one memory type to another.
Definition: memory.hpp:1339

sirius::sddk::mdarray::ld
uint32_t ld() const
Return leading dimension size.
Definition: memory.hpp:1233

sirius::sddk::mdarray::allocate
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Definition: memory.hpp:1057

sirius::la::lib_t::blas
@ blas
CPU BLAS.

sirius::la::lib_t::gpublas
@ gpublas
GPU BLAS (cuBlas or ROCblas)

sirius::sf::lmmax
int lmmax(int lmax)
Maximum number of  combinations for a given .
Definition: specfunc.hpp:44

sirius::sf::lm
int lm(int l, int m)
Get composite lm index by angular index l and azimuthal index m.
Definition: specfunc.hpp:50

sirius
Namespace of the SIRIUS library.
Definition: sirius.f90:5

sirius::y00
const double y00
First spherical harmonic .
Definition: constants.hpp:51

sirius::index_domain_t::local
@ local
Local index.

sirius::twopi
const double twopi
Definition: constants.hpp:45

sirius::fourpi
const double fourpi
Definition: constants.hpp:48

sirius::conj
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165

sirius::sum_fg_fl_yg
auto sum_fg_fl_yg(Simulation_context const &ctx__, int lmax__, std::complex< double > const *fpw__, sddk::mdarray< double, 3 > &fl__, sddk::matrix< std::complex< double > > &gvec_ylm__)
Definition: sum_fg_fl_yg.hpp:39

potential.hpp
Contains declaration and partial implementation of sirius::Potential class.

profiler.hpp
A time-based profiler.

sirius::la::constant
Definition: linalg_base.hpp:42

sum_fg_fl_yg.hpp
LAPW specific function to compute sum over plane-wave coefficients and spherical harmonics.