d9/d69/occupation__matrix_8cpp_source.html

// Copyright (c) 2013-2022 Mathieu Taillefumier, Anton Kozhevnikov, Thomas Schulthess

// All rights reserved.

//

// Redistribution and use in source and binary forms, with or without modification, are permitted provided that

// the following conditions are met:

//

// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the

//    following disclaimer.

// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions

//    and the following disclaimer in the documentation and/or other materials provided with the distribution.

//

// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED

// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A

// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR

// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR

// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


/** \file occupation_matrix.cpp

 *

 *  \brief Occupation matrix of the LDA+U method.

 */


#include "occupation_matrix.hpp"

#include "symmetry/crystal_symmetry.hpp"

#include "symmetry/symmetrize_occupation_matrix.hpp"


namespace sirius {


Occupation_matrix::Occupation_matrix(Simulation_context& ctx__)

    : Hubbard_matrix(ctx__)

{

    if (!ctx_.hubbard_correction()) {

        return;

    }

    /* a pair of "total number, offests" for the Hubbard orbitals idexing */

    int nhwf = ctx_.unit_cell().num_hubbard_wf().first;


    /* find all possible translations */

    for (int i = 0; i < ctx_.cfg().hubbard().nonlocal().size(); i++) {

        auto nl = ctx_.cfg().hubbard().nonlocal(i);

        int ia  = nl.atom_pair()[0];

        int ja  = nl.atom_pair()[1];

        auto T  = nl.T();


        auto& sym = ctx_.unit_cell().symmetry();


        for (int isym = 0; isym < sym.size(); isym++) {

            auto Ttot = sym[isym].spg_op.inv_sym_atom_T[ja] - sym[isym].spg_op.inv_sym_atom_T[ia] +

                        dot(sym[isym].spg_op.invR, r3::vector<int>(T));

            if (!occ_mtrx_T_.count(Ttot)) {

                occ_mtrx_T_[Ttot] = sddk::mdarray<std::complex<double>, 3>(nhwf, nhwf, ctx_.num_mag_comp());

                occ_mtrx_T_[Ttot].zero();

            }

        }

    }

}


template <typename T>

void

Occupation_matrix::add_k_point_contribution(K_point<T>& kp__)

{

    if (!ctx_.hubbard_correction()) {

        return;

    }


    PROFILE("sirius::Occupation_matrix::add_k_point_contribution");


    sddk::memory_t mem_host{sddk::memory_t::host};

    sddk::memory_t mem{sddk::memory_t::host};

    la::lib_t la{la::lib_t::blas};

    if (ctx_.processing_unit() == sddk::device_t::GPU) {

        mem      = sddk::memory_t::device;

        mem_host = sddk::memory_t::host_pinned;

        la       = la::lib_t::gpublas;

    }


    /* a pair of "total number, offests" for the Hubbard orbitals idexing */

    auto r = ctx_.unit_cell().num_hubbard_wf();


    int nwfu = r.first;


    sddk::matrix<std::complex<T>> occ_mtrx(nwfu, nwfu, get_memory_pool(sddk::memory_t::host), "occ_mtrx");

    if (is_device_memory(mem)) {

        occ_mtrx.allocate(get_memory_pool(mem));

    }


    // TODO collnear and non-collinear cases have a lot of similar code; there should be a way to combine it


    /* full non collinear magnetism */

    if (ctx_.num_mag_dims() == 3) {

        la::dmatrix<std::complex<T>> dm(kp__.num_occupied_bands(), nwfu, get_memory_pool(mem_host), "dm");

        if (is_device_memory(mem)) {

            dm.allocate(get_memory_pool(mem));

        }

        wf::inner(ctx_.spla_context(), mem, wf::spin_range(0, 2), kp__.spinor_wave_functions(),

                wf::band_range(0, kp__.num_occupied_bands()), kp__.hubbard_wave_functions_S(),

                wf::band_range(0, nwfu), dm, 0, 0);


        la::dmatrix<std::complex<T>> dm1(kp__.num_occupied_bands(), nwfu, get_memory_pool(mem_host), "dm1");

        #pragma omp parallel for

        for (int m = 0; m < nwfu; m++) {

            for (int j = 0; j < kp__.num_occupied_bands(); j++) {

                dm1(j, m) = dm(j, m) * static_cast<T>(kp__.band_occupancy(j, 0));

            }

        }

        if (is_device_memory(mem)) {

            dm1.allocate(get_memory_pool(mem)).copy_to(mem);

        }


        /* now compute O_{ij}^{sigma,sigma'} = \sum_{nk} <psi_nk|phi_{i,sigma}><phi_{j,sigma^'}|psi_nk> f_{nk} */

        auto alpha = std::complex<T>(kp__.weight(), 0.0);

        la::wrap(la).gemm('C', 'N', nwfu, nwfu, kp__.num_occupied_bands(), &alpha, dm.at(mem), dm.ld(), dm1.at(mem),

                        dm1.ld(), &la::constant<std::complex<T>>::zero(), occ_mtrx.at(mem), occ_mtrx.ld());

        if (is_device_memory(mem)) {

            occ_mtrx.copy_to(sddk::memory_t::host);

        }


        #pragma omp parallel for schedule(static)

        for (int at_lvl = 0; at_lvl < static_cast<int>(local_.size()); at_lvl++) {

            const int ia     = atomic_orbitals_[at_lvl].first;

            auto const& atom = ctx_.unit_cell().atom(ia);

            // we can skip this atomic level if it does not contribute to the Hubbard correction (or U = 0)

            if (atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).use_for_calculation()) {


                /* loop over the different channels */

                /* note that for atom with SO interactions, we need to jump

                   by 2 instead of 1. This is due to the fact that the

                   relativistic wave functions have different total angular

                   momentum for the same n

                */

                int s_idx[2][2] = {{0, 3}, {2, 1}};

                const int lmmax_at = 2 * atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).l() + 1;

                for (int s1 = 0; s1 < ctx_.num_spins(); s1++) {

                    for (int s2 = 0; s2 < ctx_.num_spins(); s2++) {

                        for (int mp = 0; mp < lmmax_at; mp++) {

                            for (int m = 0; m < lmmax_at; m++) {

                                local_[at_lvl](m, mp, s_idx[s1][s2]) +=

                                    occ_mtrx(r.first * s1 + offset_[at_lvl] + m, r.first * s2 + offset_[at_lvl] + mp);

                            }

                        }

                    }

                }

            }

        }

    } else {

        /* SLDA + U, we need to do the explicit calculation. The hubbard

           orbitals only have one component while the bloch wave functions

           have two. The inner product takes care of this case internally. */


        for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {

            if (kp__.num_occupied_bands(ispn) == 0) {

                continue;

            }

            la::dmatrix<std::complex<T>> dm(kp__.num_occupied_bands(ispn), nwfu, get_memory_pool(mem_host), "dm");

            if (is_device_memory(mem)) {

                dm.allocate(get_memory_pool(mem));

            }

            /* compute <psi | phi> where |phi> are the Hubbard WFs */

            wf::inner(ctx_.spla_context(), mem, wf::spin_range(ispn), kp__.spinor_wave_functions(),

                    wf::band_range(0, kp__.num_occupied_bands(ispn)), kp__.hubbard_wave_functions_S(),

                    wf::band_range(0, nwfu), dm, 0, 0);


            la::dmatrix<std::complex<T>> dm1(kp__.num_occupied_bands(ispn), nwfu, get_memory_pool(mem_host), "dm1");

            #pragma omp parallel for

            for (int m = 0; m < nwfu; m++) {

                for (int j = 0; j < kp__.num_occupied_bands(ispn); j++) {

                    dm1(j, m) = dm(j, m) * static_cast<T>(kp__.band_occupancy(j, ispn));

                }

            }

            if (is_device_memory(mem)) {

                dm1.allocate(get_memory_pool(mem)).copy_to(mem);

            }

            /* now compute O_{ij}^{sigma,sigma'} = \sum_{nk} <psi_nk|phi_{i,sigma}><phi_{j,sigma^'}|psi_nk> f_{nk} */

            /* We need to apply a factor 1/2 when we compute the occupancies for the LDA+U. It is because the

             * calculations of E and U consider occupancies <= 1.  Sirius for the LDA+U has a factor 2 in the

             * band occupancies. We need to compensate for it because it is taken into account in the

             * calculation of the hubbard potential */

            auto alpha = std::complex<T>(kp__.weight() / ctx_.max_occupancy(), 0.0);

            la::wrap(la).gemm('C', 'N', nwfu, nwfu, kp__.num_occupied_bands(ispn), &alpha, dm.at(mem), dm.ld(),

                            dm1.at(mem), dm1.ld(), &la::constant<std::complex<T>>::zero(), occ_mtrx.at(mem),

                            occ_mtrx.ld());

            if (is_device_memory(mem)) {

                occ_mtrx.copy_to(sddk::memory_t::host);

            }


            #pragma omp parallel for

            for (int at_lvl = 0; at_lvl < static_cast<int>(local_.size()); at_lvl++) {

                const int ia     = atomic_orbitals_[at_lvl].first;

                auto const& atom = ctx_.unit_cell().atom(ia);

                // we can skip the symmetrization for this atomic level if it does not contribute to the Hubbard

                // correction (or U = 0)

                if (atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).use_for_calculation()) {


                    const int lmmax_at = 2 * atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).l() + 1;

                    for (int mp = 0; mp < lmmax_at; mp++) {

                        const int mmp = offset_[at_lvl] + mp;

                        for (int m = 0; m < lmmax_at; m++) {

                            const int mm = offset_[at_lvl] + m;

                            local_[at_lvl](m, mp, ispn) += occ_mtrx(mm, mmp);

                        }

                    }

                }

            }


            for (auto& e : this->occ_mtrx_T_) {

                /* e^{-i k T} */

                auto z1 = std::exp(std::complex<double>(0, -twopi * dot(e.first, kp__.vk())));

                for (int i = 0; i < nwfu; i++) {

                    for (int j = 0; j < nwfu; j++) {

                        e.second(i, j, ispn) += static_cast<std::complex<T>>(occ_mtrx(i, j)) * static_cast<std::complex<T>>(z1);

                    }

                }

            }

        } // ispn

    }

}


template void Occupation_matrix::add_k_point_contribution<double>(K_point<double>& kp__);

#ifdef SIRIUS_USE_FP32

template void Occupation_matrix::add_k_point_contribution<float>(K_point<float>& kp__);

#endif


void

Occupation_matrix::init()

{

    if (!ctx_.hubbard_correction()) {

        return;

    }


    this->zero();

    #pragma omp parallel for schedule(static)

    for (int at_lvl = 0; at_lvl < static_cast<int>(local_.size()); at_lvl++) {

        const int ia     = atomic_orbitals_[at_lvl].first;

        auto const& atom = ctx_.unit_cell().atom(ia);


        if (atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).use_for_calculation()) {


            int il            = atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).l();

            const int lmax_at = 2 * il + 1;

            if (atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).initial_occupancy().size()) {

              /* if we specify the occcupancy in the input file */

              for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {

                    for (int m = 0; m < lmax_at; m++) {

                        this->local_[at_lvl](m, m, ispn) = atom.type()

                                                               .lo_descriptor_hub(atomic_orbitals_[at_lvl].second)

                                                               .initial_occupancy()[m + ispn * lmax_at];

                    }

                }

            } else {

                // compute the total charge for the hubbard orbitals

                double charge = atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).occupancy();

                bool nm       = true; // true if the atom is non magnetic

                int majs, mins;

                if (ctx_.num_spins() != 1) {

                    if (atom.vector_field()[2] > 0.0) {

                        nm   = false;

                        majs = 0;

                        mins = 1;

                    } else if (atom.vector_field()[2] < 0.0) {

                        nm   = false;

                        majs = 1;

                        mins = 0;

                    }

                }


                if (!nm) {

                    if (ctx_.num_mag_dims() != 3) {

                        // collinear case

                        if (charge > (lmax_at)) {

                            for (int m = 0; m < lmax_at; m++) {

                                this->local_[at_lvl](m, m, majs) = 1.0;

                                this->local_[at_lvl](m, m, mins) =

                                    (charge - static_cast<double>(lmax_at)) / static_cast<double>(lmax_at);

                            }

                        } else {

                            for (int m = 0; m < lmax_at; m++) {

                                this->local_[at_lvl](m, m, majs) = charge / static_cast<double>(lmax_at);

                            }

                        }

                    } else {

                        // double c1, s1;

                        // sincos(atom.type().starting_magnetization_theta(), &s1, &c1);

                        double c1 = atom.vector_field()[2];

                        std::complex<double> cs =

                            std::complex<double>(atom.vector_field()[0], atom.vector_field()[1]) / sqrt(1.0 - c1 * c1);

                        std::complex<double> ns[4];


                        if (charge > (lmax_at)) {

                            ns[majs] = 1.0;

                            ns[mins] = (charge - static_cast<double>(lmax_at)) / static_cast<double>(lmax_at);

                        } else {

                            ns[majs] = charge / static_cast<double>(lmax_at);

                            ns[mins] = 0.0;

                        }


                        // charge and moment

                        double nc  = ns[majs].real() + ns[mins].real();

                        double mag = ns[majs].real() - ns[mins].real();


                        // rotate the occ matrix

                        ns[0] = (nc + mag * c1) * 0.5;

                        ns[1] = (nc - mag * c1) * 0.5;

                        ns[2] = mag * std::conj(cs) * 0.5;

                        ns[3] = mag * cs * 0.5;


                        for (int m = 0; m < lmax_at; m++) {

                            this->local_[at_lvl](m, m, 0) = ns[0];

                            this->local_[at_lvl](m, m, 1) = ns[1];

                            this->local_[at_lvl](m, m, 2) = ns[2];

                            this->local_[at_lvl](m, m, 3) = ns[3];

                        }

                    }

                } else {

                    for (int s = 0; s < ctx_.num_spins(); s++) {

                        for (int m = 0; m < lmax_at; m++) {

                            this->local_[at_lvl](m, m, s) = charge * 0.5 / static_cast<double>(lmax_at);

                        }

                    }

                }

            }

        }

    }


    print_occupancies(2);

}


void

Occupation_matrix::print_occupancies(int verbosity__) const

{

    if (!ctx_.hubbard_correction()) {

        return;

    }


    if (ctx_.comm().rank() == 0) {

        std::stringstream s;

        /* print local part */

        if (ctx_.verbosity() >= verbosity__) {

            for (int at_lvl = 0; at_lvl < static_cast<int>(local_.size()); at_lvl++) {

                auto const& atom = ctx_.unit_cell().atom(atomic_orbitals_[at_lvl].first);

                int il           = atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).l();

                if (atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).use_for_calculation()) {

                    Hubbard_matrix::print_local(at_lvl, s);

                    double occ[2] = {0, 0};

                    for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {

                        for (int m = 0; m < 2 * il + 1; m++) {

                            occ[ispn] += this->local_[at_lvl](m, m, ispn).real();

                        }

                    }

                    if (ctx_.num_spins() == 2) {

                        s << "Atom charge (total) " << occ[0] + occ[1] << " (n_up) " << occ[0] << " (n_down) " << occ[1]

                          << " (mz) " << occ[0] - occ[1] << std::endl;

                    } else {

                        s << "Atom charge (total) " << 2 * occ[0] << std::endl;

                    }

                }

            }

        }

        /* print non-local part */

        if (ctx_.cfg().hubbard().nonlocal().size() && (ctx_.verbosity() >= verbosity__ + 1)) {

            s << std::endl;

            for (int i = 0; i < static_cast<int>(ctx_.cfg().hubbard().nonlocal().size()); i++) {

                Hubbard_matrix::print_nonlocal(i, s);

            }

        }

        if (ctx_.verbosity() >= verbosity__) {

            ctx_.message(1, "occ.mtrx", s);

        }

    }

}


} // namespace sirius

crystal_symmetry.hpp
Contains definition and partial implementation of sirius::Crystal_symmetry class.

sirius::sddk::is_device_memory
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
Definition: memory.hpp:93

sirius::acc::zero
void zero(T *ptr__, size_t n__)
Zero the device memory.
Definition: acc.hpp:397

sirius::la::lib_t
lib_t
Type of linear algebra backend library.
Definition: linalg_base.hpp:70

sirius::wf::inner
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
Definition: wave_functions.hpp:1622

sirius
Namespace of the SIRIUS library.
Definition: sirius.f90:5

sirius::spin_block_t::nm
@ nm
Non-magnetic case.

sirius::conj
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165

occupation_matrix.hpp
Occupation matrix of the LDA+U method.

symmetrize_occupation_matrix.hpp
Symmetrize occupation matrix of the LDA+U method.