d9/dbe/multi__cg_8hpp_source.html

// Copyright (c) 2018-2022 Harmen Stoppels, Anton Kozhevnikov

// All rights reserved.

//

// Redistribution and use in source and binary forms, with or without modification, are permitted provided that

// the following conditions are met:

//

// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the

//    following disclaimer.

// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions

//    and the following disclaimer in the documentation and/or other materials provided with the distribution.

//

// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED

// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A

// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR

// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR

// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


/** \file multi_cg.hpp

 *

 *  \brief Linear response functionality.

 */


#ifndef __MULTI_CG_HPP__

#define __MULTI_CG_HPP__


#include <vector>

#include <algorithm>

#include <numeric>

#include <cmath>

#include <iostream>

#include <complex>

#include "core/wf/wave_functions.hpp"

#include "hamiltonian/residuals.hpp"

#include "hamiltonian/hamiltonian.hpp"

#include "hamiltonian/non_local_operator.hpp"

#include "k_point/k_point.hpp"


namespace sirius {

/// Conjugate-gradient solver.

namespace cg {


template <class T>

void

repack(std::vector<T> &data, std::vector<int> const&ids)

{

    for (size_t i = 0; i < ids.size(); ++i) {

        data[i] = data[ids[i]];

    }

}


template<typename Matrix, typename Prec, typename StateVec>

auto

multi_cg(Matrix &A, Prec &P, StateVec &X, StateVec &B, StateVec &U, StateVec &C,

    int maxiters = 10, double tol = 1e-3, bool initial_guess_is_zero = false) {


    PROFILE("sirius::multi_cg");


    auto const n = X.cols();


    U.zero();


    // Use R for residual, we modify the right-hand side B in-place.

    auto &R = B;


    // Use B effectively as the residual block-vector

    // R = B - A * X -- don't multiply when initial guess is zero.

    if (!initial_guess_is_zero) {

        A.multiply(-1.0, X, 1.0, R, n);

    }


    auto rhos = std::vector<typename StateVec::value_type>(n);

    auto rhos_old = rhos;

    auto sigmas = rhos;

    auto alphas = rhos;


    // When vectors converge we move them to the front, but we can't really do

    // that with X, so we have to keep track of where is what.

    auto ids = std::vector<int>(n);

    std::iota(ids.begin(), ids.end(), 0);


    size_t num_unconverged = n;


    double niter_eff{0};


    auto residual_history = std::vector<std::vector<typename StateVec::value_type>>(n);

    int niter{0};

    for (int iter = 0; iter < maxiters; ++iter) {

        niter = iter;

        // Check the residual norms in the P-norm

        // that means whenever P is approximately inv(A)

        // since (r, Pr) = (Ae, PAe) ~= (e, Ae)

        // we check the errors roughly in the A-norm.

        // When P = I, we just check the residual norm.


        // C = P * R.

        P.apply(C, R);


        rhos_old = rhos;


        // rhos = dot(C, R)

        C.block_dot(R, rhos, num_unconverged);


        for (size_t i = 0; i < num_unconverged; ++i) {

            residual_history[ids[i]].push_back(std::sqrt(std::abs(rhos[i])));

        }


        auto not_converged = std::vector<int>{};

        for (size_t i = 0; i < num_unconverged; ++i) {

            if (std::abs(rhos[i]) > tol * tol) {

                not_converged.push_back(i);

            }

        }


        num_unconverged = not_converged.size();


        niter_eff += static_cast<double>(num_unconverged) / n;


        if (not_converged.empty()) {

            break;

        }


        // Move everything contiguously to the front,

        // except for X, since that's updated in-place.

        repack(ids, not_converged); // use repack on 1-D vector

        repack(rhos, not_converged);

        repack(rhos_old, not_converged);


        U.repack(not_converged); // use repack from the Wave_functions_wrap

        C.repack(not_converged);

        R.repack(not_converged);


        A.repack(not_converged); // use repack from the Linear_response_operator

        P.repack(not_converged); // use repack from the preconditioner


        /* The repack on A and P changes the eigenvalue vectors of A and P respectively */

        /* The eigenvalues of the Linear_response_operator A are sent to device when needed */

        /* Update P.eigvals on device here */

        if (sddk::is_device_memory(P.mem)) {

            P.eigvals.copy_to(sddk::memory_t::device);

        }


        // In the first iteration we have U == 0, so no need for an axpy.

        if (iter == 0) {

            U.copy(C, num_unconverged);

        } else {

            for (size_t i = 0; i < num_unconverged; ++i) {

                alphas[i] = rhos[i] / rhos_old[i];

            }


            // U[:, i] = C[:, i] + alpha[i] * U[:, i] for i < num_unconverged

            U.block_xpby(C, alphas, num_unconverged);

        }


        // C = A * U.

        A.multiply(1.0, U, 0.0, C, num_unconverged);


        // compute the optimal distance for the search direction

        // sigmas = dot(U, C)

        U.block_dot(C, sigmas, num_unconverged);


        // Update the solution and the residual

        for (size_t i = 0; i < num_unconverged; ++i) {

            alphas[i] = rhos[i] / sigmas[i];

        }


        // X[:, ids[i]] += alpha[i] * U[:, i]

        X.block_axpy_scatter(alphas, U, ids, num_unconverged);


        for (size_t i = 0; i < num_unconverged; ++i) {

            alphas[i] *= -1;

        }


        // R[:, i] += alpha[i] * C[:, i] for i < num_unconverged

        R.block_axpy(alphas, C, num_unconverged);

    }

    struct {

        std::vector<std::vector<typename StateVec::value_type>> residual_history;

        int niter; int niter_eff;} result{residual_history, niter, static_cast<int>(niter_eff)};

    return result;

}

}


/// Linear respone functions and objects.

namespace lr {


struct Wave_functions_wrap {

    wf::Wave_functions<double> *x;

    sddk::memory_t mem;


    typedef std::complex<double> value_type;


    void zero()

    {

        x->zero(mem);

    }


    int cols() const

    {

        return x->num_wf().get();

    }


    void block_dot(Wave_functions_wrap const& y__, std::vector<value_type>& rhos__, size_t N__)

    {

        rhos__ = wf::inner_diag<double, value_type>(mem, *x, *y__.x, wf::spin_range(0),

                wf::num_bands(N__));

    }


    void repack(std::vector<int> const& ids__)

    {

        PROFILE("sirius::Wave_functions_wrap::repack");

        int j{0};

        for (auto i : ids__) {

            if (j != i) {

                wf::copy(mem, *x, wf::spin_index(0), wf::band_range(i, i + 1),

                        *x, wf::spin_index(0), wf::band_range(j, j + 1));

            }

            ++j;

        }

    }


    void copy(Wave_functions_wrap const &y__, size_t N__)

    {

        wf::copy(mem, *y__.x, wf::spin_index(0), wf::band_range(0, N__),

                    *x, wf::spin_index(0), wf::band_range(0, N__));

    }


    void block_xpby(Wave_functions_wrap const &y__, std::vector<value_type> const &alphas, int N__) {

        std::vector<value_type> ones(N__, 1.0);

        wf::axpby(mem, wf::spin_range(0), wf::band_range(0, N__), ones.data(), y__.x, alphas.data(), x);

    }


    void block_axpy_scatter(std::vector<value_type> const& alphas__, Wave_functions_wrap const &y__,

            std::vector<int> const &idx__, int n__)

    {

        wf::axpy_scatter<double, value_type, int>(mem, wf::spin_range(0), alphas__.data(), y__.x, idx__.data(), x, n__);

    }


    void block_axpy(std::vector<value_type> const &alphas__, Wave_functions_wrap const &y__, int N__) {

        std::vector<value_type> ones(N__, 1.0);

        wf::axpby(mem, wf::spin_range(0), wf::band_range(0, N__), alphas__.data(), y__.x, ones.data(), x);

    }

};


struct Identity_preconditioner {

    size_t num_active;


    void apply(Wave_functions_wrap &x, Wave_functions_wrap const &y) {

        x.copy(y, num_active);

    }


    void repack(std::vector<int> const &ids) {

        num_active = ids.size();

    }

};


struct Smoothed_diagonal_preconditioner {

    sddk::mdarray<double, 2> H_diag;

    sddk::mdarray<double, 2> S_diag;

    sddk::mdarray<double, 1> eigvals;

    int num_active;

    sddk::memory_t mem;

    wf::spin_range sr;


    void apply(Wave_functions_wrap &x, Wave_functions_wrap const &y) {

        // Could avoid a copy here, but apply_precondition is in-place.

        x.copy(y, num_active);

        sirius::apply_preconditioner(

            mem,

            sr,

            wf::num_bands(num_active),

            *x.x,

            H_diag,

            S_diag,

            eigvals);

    }


    void repack(std::vector<int> const &ids) {

        num_active = ids.size();

        for (size_t i = 0; i < ids.size(); ++i) {

            eigvals[i] = eigvals[ids[i]];

        }

    }

};


struct Linear_response_operator {

    sirius::Simulation_context &ctx;

    sirius::Hamiltonian_k<double> &Hk;

    std::vector<double> min_eigenvals;

    wf::Wave_functions<double> * Hphi;

    wf::Wave_functions<double> * Sphi;

    wf::Wave_functions<double> * evq;

    wf::Wave_functions<double> * tmp;

    double alpha_pv;

    wf::band_range br;

    wf::spin_range sr;

    sddk::memory_t mem;

    la::dmatrix<std::complex<double>> overlap;


    Linear_response_operator(

        sirius::Simulation_context &ctx,

        sirius::Hamiltonian_k<double> & Hk,

        std::vector<double> const &eigvals,

        wf::Wave_functions<double> * Hphi,

        wf::Wave_functions<double> * Sphi,

        wf::Wave_functions<double> * evq,

        wf::Wave_functions<double> * tmp,

        double alpha_pv,

        wf::band_range br,

        wf::spin_range sr,

        sddk::memory_t mem)

    : ctx(ctx), Hk(Hk), min_eigenvals(eigvals), Hphi(Hphi), Sphi(Sphi), evq(evq), tmp(tmp),

      alpha_pv(alpha_pv), br(br), sr(sr), mem(mem), overlap(br.size(), br.size())

    {

        // I think we could just compute alpha_pv here by just making it big enough

        // s.t. the operator H - e * S + alpha_pv * Q is positive, e.g:

        // alpha_pv = 2 * min_eigenvals.back();

        // but QE has a very specific way to compute it, so we just forward it from

        // there.;


        // flip the sign of the eigenvals so that the axpby works

        for (auto &e : min_eigenvals) {

            e *= -1;

        }

    }


    void repack(std::vector<int> const &ids) {

        for (size_t i = 0; i < ids.size(); ++i) {

            min_eigenvals[i] = min_eigenvals[ids[i]];

        }

    }


    // y[:, i] <- alpha * A * x[:, i] + beta * y[:, i] where A = (H - e_j S + constant   * SQ * SQ')

    // where SQ is S * eigenvectors.

    void multiply(double alpha, Wave_functions_wrap x, double beta, Wave_functions_wrap y, int num_active) {

        PROFILE("sirius::Linear_response_operator::multiply");

        // Hphi = H * x, Sphi = S * x

        Hk.apply_h_s<std::complex<double>>(

            sr,

            wf::band_range(0, num_active),

            *x.x,

            Hphi,

            Sphi

        );


        std::vector<double> ones(num_active, 1.0);


        // effectively tmp := (H - e * S) * x, as an axpy, modifying Hphi.

        wf::axpby(mem, wf::spin_range(0), wf::band_range(0, num_active),

                min_eigenvals.data(), Sphi, ones.data(), Hphi);

        wf::copy(mem, *Hphi, wf::spin_index(0), wf::band_range(0, num_active), *tmp,

                wf::spin_index(0), wf::band_range(0, num_active));


        // Projector, add alpha_pv * (S * (evq * (evq' * (S * x))))


        // overlap := evq' * (S * x)

        wf::inner(ctx.spla_context(), mem, wf::spin_range(0), *evq, br, *Sphi, wf::band_range(0, num_active),

                overlap, 0, 0);


        // Hphi := evq * overlap

        wf::transform(

            ctx.spla_context(),

            mem,

            overlap, 0, 0,

            1.0, *evq, wf::spin_index(0), br,

            0.0, *Hphi, wf::spin_index(0), wf::band_range(0, num_active));


        Hk.apply_s<std::complex<double>>(wf::spin_range(0), wf::band_range(0, num_active), *Hphi, *Sphi);


        // tmp := alpha_pv * Sphi + tmp = (H - e * S) * x + alpha_pv * (S * (evq * (evq' * (S * x))))

        std::vector<double> alpha_pvs(num_active, alpha_pv);

        wf::axpby(mem, wf::spin_range(0), wf::band_range(0, num_active),

                alpha_pvs.data(), Sphi, ones.data(), tmp);

        // y[:, i] <- alpha * tmp + beta * y[:, i]

        std::vector<double> alphas(num_active, alpha);

        std::vector<double> betas(num_active, beta);

        wf::axpby(mem, wf::spin_range(0), wf::band_range(0, num_active),

                alphas.data(), tmp, betas.data(), y.x);

    }

};


}


}

#endif

sirius::Hamiltonian_k< double >

sirius::Hamiltonian_k::apply_h_s
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > apply_h_s(wf::spin_range spins__, wf::band_range br__, wf::Wave_functions< T > const &phi__, wf::Wave_functions< T > *hphi__, wf::Wave_functions< T > *sphi__) const
Apply pseudopotential H and S operators to the wavefunctions.
Definition: hamiltonian.hpp:439

sirius::Hamiltonian_k::apply_s
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > apply_s(wf::spin_range spin__, wf::band_range br__, wf::Wave_functions< T > const &phi__, wf::Wave_functions< T > &sphi__) const
apply S operator
Definition: hamiltonian.hpp:516

sirius::Matrix
Definition: adaptor.hpp:41

sirius::Simulation_context
Simulation context is a set of parameters and objects describing a single simulation.
Definition: simulation_context.hpp:183

sirius::la::dmatrix
Distributed matrix.
Definition: dmatrix.hpp:56

sirius::sddk::mdarray< double, 2 >

sirius::strong_type< int, struct __num_bands_tag >

sirius::wf::Wave_functions_base::num_wf
auto num_wf() const
Return number of wave-functions.
Definition: wave_functions.hpp:395

sirius::wf::Wave_functions_base::zero
void zero(sddk::memory_t mem__, spin_index s__, band_range br__)
Zero a spin component of the wave-functions in a band range.
Definition: wave_functions.hpp:419

sirius::wf::Wave_functions< double >

sirius::wf::band_range
Describe a range of bands.
Definition: wave_functions.hpp:130

sirius::wf::spin_range
Describe a range of spins.
Definition: wave_functions.hpp:170

hamiltonian.hpp
Contains declaration and definition of sirius::Hamiltonian class.

k_point.hpp
Contains definition of sirius::K_point class.

sirius::sddk::memory_t
memory_t
Memory types where the code can store data.
Definition: memory.hpp:71

sirius::wf::axpby
void axpby(sddk::memory_t mem__, wf::spin_range spins__, wf::band_range br__, F const *alpha__, wf::Wave_functions< T > const *x__, F const *beta__, wf::Wave_functions< T > *y__)
Perform y <- a * x + b * y type of operation.
Definition: wave_functions.hpp:1323

sirius::wf::inner
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
Definition: wave_functions.hpp:1622

sirius::wf::transform
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > transform(::spla::Context &spla_ctx__, sddk::memory_t mem__, la::dmatrix< F > const &M__, int irow0__, int jcol0__, real_type< F > alpha__, Wave_functions< T > const &wf_in__, spin_index s_in__, band_range br_in__, real_type< F > beta__, Wave_functions< T > &wf_out__, spin_index s_out__, band_range br_out__)
Apply linear transformation to the wave-functions.
Definition: wave_functions.hpp:1490

sirius::wf::copy
void copy(sddk::memory_t mem__, Wave_functions< T > const &in__, wf::spin_index s_in__, wf::band_range br_in__, Wave_functions< F > &out__, wf::spin_index s_out__, wf::band_range br_out__)
Copy wave-functions.
Definition: wave_functions.hpp:1451

sirius
Namespace of the SIRIUS library.
Definition: sirius.f90:5

sirius::apply_preconditioner
void apply_preconditioner(sddk::memory_t mem__, wf::spin_range spins__, wf::num_bands num_bands__, wf::Wave_functions< T > &res__, sddk::mdarray< T, 2 > const &h_diag__, sddk::mdarray< T, 2 > const &o_diag__, sddk::mdarray< T, 1 > const &eval__)
Apply preconditioner to the residuals.
Definition: residuals.hpp:166

non_local_operator.hpp
Contains declaration of sirius::Non_local_operator class.

residuals.hpp
Compute residuals from the eigen-vectors and basis functions.

sirius::lr::Identity_preconditioner
Definition: multi_cg.hpp:246

sirius::lr::Linear_response_operator
Definition: multi_cg.hpp:287

sirius::lr::Smoothed_diagonal_preconditioner
Definition: multi_cg.hpp:258

sirius::lr::Wave_functions_wrap
Definition: multi_cg.hpp:188

wave_functions.hpp
Contains declaration and implementation of Wave_functions class.