d6/d35/generate__subspace__matrix_8hpp_source.html

// Copyright (c) 2013-2023 Anton Kozhevnikov, Thomas Schulthess

// All rights reserved.

//

// Redistribution and use in source and binary forms, with or without modification, are permitted provided that

// the following conditions are met:

//

// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the

//    following disclaimer.

// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions

//    and the following disclaimer in the documentation and/or other materials provided with the distribution.

//

// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED

// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A

// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR

// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR

// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


/** \file generate_subspace_matrix.hpp

 *

 *  \brief Generate subspace-matrix in the auxiliary basis |phi>

 */


#ifndef __GENERATE_SUBSPACE_MATRIX_HPP__

#define __GENERATE_SUBSPACE_MATRIX_HPP__


#include "context/simulation_context.hpp"

#include "core/wf/wave_functions.hpp"


namespace sirius {


/// Generate subspace matrix for the iterative diagonalization.

/** Compute \f$ O_{ii'} = \langle \phi_i | \hat O | \phi_{i'} \rangle \f$ operator matrix

 *  for the subspace spanned by the wave-functions \f$ \phi_i \f$. The matrix is always returned

 *  in the CPU pointer because most of the standard math libraries start from the CPU.

 *

 *  \tparam T Precision type of the wave-functions

 *  \tparam F Type of the output subspace matrix.

 *  \param [in] ctx Simulation context

 *  \param [in] N Number of existing (old) states phi.

 *  \param [in] n Number of new states phi.

 *  \param [in] num_locked Number of locked states phi. Locked states are excluded from the subspace basis.

 *  \param [in] phi Subspace basis functions phi.

 *  \param [in] op_phi Operator (H, S or overlap) applied to phi.

 *  \param [out] mtrx New subspace matrix.

 *  \param [inout] mtrx_old Pointer to old subpsace matrix. It is used to store and reuse the subspace matrix

 *                          of the previous step.

 *  */

template <typename T, typename F>

void generate_subspace_matrix(Simulation_context& ctx__, int N__, int n__, int num_locked__,

        wf::Wave_functions<T>& phi__, wf::Wave_functions<T>& op_phi__, la::dmatrix<F>& mtrx__,

        la::dmatrix<F>* mtrx_old__ = nullptr)

{

    PROFILE("sirius::generate_subspace_matrix");


    RTE_ASSERT(n__ != 0);

    if (mtrx_old__ && mtrx_old__->size()) {

        RTE_ASSERT(&mtrx__.blacs_grid() == &mtrx_old__->blacs_grid());

    }


    /* copy old N - num_locked x N - num_locked distributed matrix */

    if (N__ > 0) {

        splindex_block_cyclic<> spl_row(N__ - num_locked__,

                n_blocks(mtrx__.blacs_grid().num_ranks_row()), block_id(mtrx__.blacs_grid().rank_row()),

                mtrx__.bs_row());

        splindex_block_cyclic<> spl_col(N__ - num_locked__,

                n_blocks(mtrx__.blacs_grid().num_ranks_col()), block_id(mtrx__.blacs_grid().rank_col()),

                mtrx__.bs_col());


        if (mtrx_old__) {

            if (spl_row.local_size()) {

                #pragma omp parallel for schedule(static)

                for (int i = 0; i < spl_col.local_size(); i++) {

                    std::copy(&(*mtrx_old__)(0, i), &(*mtrx_old__)(0, i) + spl_row.local_size(), &mtrx__(0, i));

                }

            }

        }


        if (env::print_checksum()) {

            auto cs = mtrx__.checksum(N__ - num_locked__, N__ - num_locked__);

            if (ctx__.comm_band().rank() == 0) {

                print_checksum("subspace_mtrx_old", cs, RTE_OUT(std::cout));

            }

        }

    }


    /*  [--- num_locked -- | ------ N - num_locked ---- | ---- n ----] */

    /*  [ ------------------- N ------------------------| ---- n ----] */


    auto mem = ctx__.processing_unit() == sddk::device_t::CPU ? sddk::memory_t::host : sddk::memory_t::device;

    /* <{phi,phi_new}|Op|phi_new> */

    inner(ctx__.spla_context(), mem, ctx__.num_mag_dims() == 3 ? wf::spin_range(0, 2) : wf::spin_range(0), phi__,

            wf::band_range(num_locked__, N__ + n__), op_phi__, wf::band_range(N__, N__ + n__),

            mtrx__, 0, N__ - num_locked__);


    /* restore lower part */

    if (N__ > 0) {

        if (mtrx__.blacs_grid().comm().size() == 1) {

            #pragma omp parallel for

            for (int i = 0; i < N__ - num_locked__; i++) {

                for (int j = N__ - num_locked__; j < N__ + n__ - num_locked__; j++) {

                    mtrx__(j, i) = conj(mtrx__(i, j));

                }

            }

        } else {

            la::wrap(la::lib_t::scalapack)

                .tranc(n__, N__ - num_locked__, mtrx__, 0, N__ - num_locked__, mtrx__, N__ - num_locked__, 0);

        }

    }


    if (env::print_checksum()) {

        splindex_block_cyclic<> spl_row(N__ + n__ - num_locked__,

                n_blocks(mtrx__.blacs_grid().num_ranks_row()), block_id(mtrx__.blacs_grid().rank_row()),

                mtrx__.bs_row());

        splindex_block_cyclic<> spl_col(N__ + n__ - num_locked__,

                n_blocks(mtrx__.blacs_grid().num_ranks_col()), block_id(mtrx__.blacs_grid().rank_col()),

                mtrx__.bs_col());

        auto cs = mtrx__.checksum(N__ + n__ - num_locked__, N__ + n__ - num_locked__);

        if (ctx__.comm_band().rank() == 0) {

            print_checksum("subspace_mtrx", cs, RTE_OUT(std::cout));

        }

    }


    /* remove any numerical noise */

    mtrx__.make_real_diag(N__ + n__ - num_locked__);


    /* save new matrix */

    if (mtrx_old__) {

        splindex_block_cyclic<> spl_row(N__ + n__ - num_locked__,

                n_blocks(mtrx__.blacs_grid().num_ranks_row()), block_id(mtrx__.blacs_grid().rank_row()),

                mtrx__.bs_row());

        splindex_block_cyclic<> spl_col(N__ + n__ - num_locked__,

                n_blocks(mtrx__.blacs_grid().num_ranks_col()), block_id(mtrx__.blacs_grid().rank_col()),

                mtrx__.bs_col());


        if (spl_row.local_size()) {

            #pragma omp parallel for schedule(static)

            for (int i = 0; i < spl_col.local_size(); i++) {

                std::copy(&mtrx__(0, i), &mtrx__(0, i) + spl_row.local_size(), &(*mtrx_old__)(0, i));

            }

        }

    }

}


}


#endif

sirius::Simulation_context
Simulation context is a set of parameters and objects describing a single simulation.
Definition: simulation_context.hpp:183

sirius::Simulation_context::comm_band
auto const & comm_band() const
Band parallelization communicator.
Definition: simulation_context.hpp:468

sirius::Simulation_parameters::num_mag_dims
int num_mag_dims() const
Number of dimensions in the magnetization vector.
Definition: simulation_parameters.hpp:256

sirius::la::dmatrix
Distributed matrix.
Definition: dmatrix.hpp:56

sirius::la::dmatrix::bs_row
int bs_row() const
Row blocking factor.
Definition: dmatrix.hpp:301

sirius::la::dmatrix::bs_col
int bs_col() const
Column blocking factor.
Definition: dmatrix.hpp:307

sirius::la::wrap
Definition: linalg.hpp:62

sirius::la::wrap::tranc
void tranc(ftn_int m, ftn_int n, dmatrix< T > &A, ftn_int ia, ftn_int ja, dmatrix< T > &C, ftn_int ic, ftn_int jc) const
Conjugate transpose matrix.

sirius::splindex_block_cyclic
Definition: splindex.hpp:367

sirius::splindex_block_cyclic::local_size
value_type local_size(block_id block_id__) const
Return local size of the split index for a given block.
Definition: splindex.hpp:387

sirius::strong_type< int, struct __block_id_tag >

sirius::wf::Wave_functions
Wave-functions representation.
Definition: wave_functions.hpp:691

sirius::wf::band_range
Describe a range of bands.
Definition: wave_functions.hpp:130

sirius::wf::spin_range
Describe a range of spins.
Definition: wave_functions.hpp:170

sirius::acc::copy
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
Definition: acc.hpp:320

sirius::la::lib_t::scalapack
@ scalapack
CPU ScaLAPACK.

sirius::wf::inner
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
Definition: wave_functions.hpp:1622

sirius
Namespace of the SIRIUS library.
Definition: sirius.f90:5

sirius::n_blocks
strong_type< int, struct __n_blocks_tag > n_blocks
Number of blocks to which the global index is split.
Definition: splindex.hpp:105

sirius::conj
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165

sirius::generate_subspace_matrix
void generate_subspace_matrix(Simulation_context &ctx__, int N__, int n__, int num_locked__, wf::Wave_functions< T > &phi__, wf::Wave_functions< T > &op_phi__, la::dmatrix< F > &mtrx__, la::dmatrix< F > *mtrx_old__=nullptr)
Generate subspace matrix for the iterative diagonalization.
Definition: generate_subspace_matrix.hpp:51

simulation_context.hpp
Contains definition and implementation of Simulation_context class.

wave_functions.hpp
Contains declaration and implementation of Wave_functions class.