SIRIUS 7.5.0
Electronic structure library and applications
generate_subspace_matrix.hpp
Go to the documentation of this file.
1// Copyright (c) 2013-2023 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file generate_subspace_matrix.hpp
21 *
22 * \brief Generate subspace-matrix in the auxiliary basis |phi>
23 */
24
25#ifndef __GENERATE_SUBSPACE_MATRIX_HPP__
26#define __GENERATE_SUBSPACE_MATRIX_HPP__
27
30
31namespace sirius {
32
33/// Generate subspace matrix for the iterative diagonalization.
34/** Compute \f$ O_{ii'} = \langle \phi_i | \hat O | \phi_{i'} \rangle \f$ operator matrix
35 * for the subspace spanned by the wave-functions \f$ \phi_i \f$. The matrix is always returned
36 * in the CPU pointer because most of the standard math libraries start from the CPU.
37 *
38 * \tparam T Precision type of the wave-functions
39 * \tparam F Type of the output subspace matrix.
40 * \param [in] ctx Simulation context
41 * \param [in] N Number of existing (old) states phi.
42 * \param [in] n Number of new states phi.
43 * \param [in] num_locked Number of locked states phi. Locked states are excluded from the subspace basis.
44 * \param [in] phi Subspace basis functions phi.
45 * \param [in] op_phi Operator (H, S or overlap) applied to phi.
46 * \param [out] mtrx New subspace matrix.
47 * \param [inout] mtrx_old Pointer to old subpsace matrix. It is used to store and reuse the subspace matrix
48 * of the previous step.
49 * */
50template <typename T, typename F>
51void generate_subspace_matrix(Simulation_context& ctx__, int N__, int n__, int num_locked__,
53 la::dmatrix<F>* mtrx_old__ = nullptr)
54{
55 PROFILE("sirius::generate_subspace_matrix");
56
57 RTE_ASSERT(n__ != 0);
58 if (mtrx_old__ && mtrx_old__->size()) {
59 RTE_ASSERT(&mtrx__.blacs_grid() == &mtrx_old__->blacs_grid());
60 }
61
62 /* copy old N - num_locked x N - num_locked distributed matrix */
63 if (N__ > 0) {
64 splindex_block_cyclic<> spl_row(N__ - num_locked__,
65 n_blocks(mtrx__.blacs_grid().num_ranks_row()), block_id(mtrx__.blacs_grid().rank_row()),
66 mtrx__.bs_row());
67 splindex_block_cyclic<> spl_col(N__ - num_locked__,
68 n_blocks(mtrx__.blacs_grid().num_ranks_col()), block_id(mtrx__.blacs_grid().rank_col()),
69 mtrx__.bs_col());
70
71 if (mtrx_old__) {
72 if (spl_row.local_size()) {
73 #pragma omp parallel for schedule(static)
74 for (int i = 0; i < spl_col.local_size(); i++) {
75 std::copy(&(*mtrx_old__)(0, i), &(*mtrx_old__)(0, i) + spl_row.local_size(), &mtrx__(0, i));
76 }
77 }
78 }
79
80 if (env::print_checksum()) {
81 auto cs = mtrx__.checksum(N__ - num_locked__, N__ - num_locked__);
82 if (ctx__.comm_band().rank() == 0) {
83 print_checksum("subspace_mtrx_old", cs, RTE_OUT(std::cout));
84 }
85 }
86 }
87
88 /* [--- num_locked -- | ------ N - num_locked ---- | ---- n ----] */
89 /* [ ------------------- N ------------------------| ---- n ----] */
90
91 auto mem = ctx__.processing_unit() == sddk::device_t::CPU ? sddk::memory_t::host : sddk::memory_t::device;
92 /* <{phi,phi_new}|Op|phi_new> */
93 inner(ctx__.spla_context(), mem, ctx__.num_mag_dims() == 3 ? wf::spin_range(0, 2) : wf::spin_range(0), phi__,
94 wf::band_range(num_locked__, N__ + n__), op_phi__, wf::band_range(N__, N__ + n__),
95 mtrx__, 0, N__ - num_locked__);
96
97 /* restore lower part */
98 if (N__ > 0) {
99 if (mtrx__.blacs_grid().comm().size() == 1) {
100 #pragma omp parallel for
101 for (int i = 0; i < N__ - num_locked__; i++) {
102 for (int j = N__ - num_locked__; j < N__ + n__ - num_locked__; j++) {
103 mtrx__(j, i) = conj(mtrx__(i, j));
104 }
105 }
106 } else {
108 .tranc(n__, N__ - num_locked__, mtrx__, 0, N__ - num_locked__, mtrx__, N__ - num_locked__, 0);
109 }
110 }
111
112 if (env::print_checksum()) {
113 splindex_block_cyclic<> spl_row(N__ + n__ - num_locked__,
114 n_blocks(mtrx__.blacs_grid().num_ranks_row()), block_id(mtrx__.blacs_grid().rank_row()),
115 mtrx__.bs_row());
116 splindex_block_cyclic<> spl_col(N__ + n__ - num_locked__,
117 n_blocks(mtrx__.blacs_grid().num_ranks_col()), block_id(mtrx__.blacs_grid().rank_col()),
118 mtrx__.bs_col());
119 auto cs = mtrx__.checksum(N__ + n__ - num_locked__, N__ + n__ - num_locked__);
120 if (ctx__.comm_band().rank() == 0) {
121 print_checksum("subspace_mtrx", cs, RTE_OUT(std::cout));
122 }
123 }
124
125 /* remove any numerical noise */
126 mtrx__.make_real_diag(N__ + n__ - num_locked__);
127
128 /* save new matrix */
129 if (mtrx_old__) {
130 splindex_block_cyclic<> spl_row(N__ + n__ - num_locked__,
131 n_blocks(mtrx__.blacs_grid().num_ranks_row()), block_id(mtrx__.blacs_grid().rank_row()),
132 mtrx__.bs_row());
133 splindex_block_cyclic<> spl_col(N__ + n__ - num_locked__,
134 n_blocks(mtrx__.blacs_grid().num_ranks_col()), block_id(mtrx__.blacs_grid().rank_col()),
135 mtrx__.bs_col());
136
137 if (spl_row.local_size()) {
138 #pragma omp parallel for schedule(static)
139 for (int i = 0; i < spl_col.local_size(); i++) {
140 std::copy(&mtrx__(0, i), &mtrx__(0, i) + spl_row.local_size(), &(*mtrx_old__)(0, i));
141 }
142 }
143 }
144}
145
146}
147
148#endif
Simulation context is a set of parameters and objects describing a single simulation.
auto const & comm_band() const
Band parallelization communicator.
int num_mag_dims() const
Number of dimensions in the magnetization vector.
Distributed matrix.
Definition: dmatrix.hpp:56
int bs_row() const
Row blocking factor.
Definition: dmatrix.hpp:301
int bs_col() const
Column blocking factor.
Definition: dmatrix.hpp:307
void tranc(ftn_int m, ftn_int n, dmatrix< T > &A, ftn_int ia, ftn_int ja, dmatrix< T > &C, ftn_int ic, ftn_int jc) const
Conjugate transpose matrix.
value_type local_size(block_id block_id__) const
Return local size of the split index for a given block.
Definition: splindex.hpp:387
Wave-functions representation.
Describe a range of bands.
Describe a range of spins.
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
Definition: acc.hpp:320
@ scalapack
CPU ScaLAPACK.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
strong_type< int, struct __n_blocks_tag > n_blocks
Number of blocks to which the global index is split.
Definition: splindex.hpp:105
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165
void generate_subspace_matrix(Simulation_context &ctx__, int N__, int n__, int num_locked__, wf::Wave_functions< T > &phi__, wf::Wave_functions< T > &op_phi__, la::dmatrix< F > &mtrx__, la::dmatrix< F > *mtrx_old__=nullptr)
Generate subspace matrix for the iterative diagonalization.
Contains definition and implementation of Simulation_context class.
Contains declaration and implementation of Wave_functions class.