SIRIUS 7.5.0
Electronic structure library and applications
multi_cg.hpp
Go to the documentation of this file.
1// Copyright (c) 2018-2022 Harmen Stoppels, Anton Kozhevnikov
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file multi_cg.hpp
21 *
22 * \brief Linear response functionality.
23 */
24
25#ifndef __MULTI_CG_HPP__
26#define __MULTI_CG_HPP__
27
28#include <vector>
29#include <algorithm>
30#include <numeric>
31#include <cmath>
32#include <iostream>
33#include <complex>
38#include "k_point/k_point.hpp"
39
40namespace sirius {
41/// Conjugate-gradient solver.
42namespace cg {
43
44template <class T>
45void
46repack(std::vector<T> &data, std::vector<int> const&ids)
47{
48 for (size_t i = 0; i < ids.size(); ++i) {
49 data[i] = data[ids[i]];
50 }
51}
52
53template<typename Matrix, typename Prec, typename StateVec>
54auto
55multi_cg(Matrix &A, Prec &P, StateVec &X, StateVec &B, StateVec &U, StateVec &C,
56 int maxiters = 10, double tol = 1e-3, bool initial_guess_is_zero = false) {
57
58 PROFILE("sirius::multi_cg");
59
60 auto const n = X.cols();
61
62 U.zero();
63
64 // Use R for residual, we modify the right-hand side B in-place.
65 auto &R = B;
66
67 // Use B effectively as the residual block-vector
68 // R = B - A * X -- don't multiply when initial guess is zero.
69 if (!initial_guess_is_zero) {
70 A.multiply(-1.0, X, 1.0, R, n);
71 }
72
73 auto rhos = std::vector<typename StateVec::value_type>(n);
74 auto rhos_old = rhos;
75 auto sigmas = rhos;
76 auto alphas = rhos;
77
78 // When vectors converge we move them to the front, but we can't really do
79 // that with X, so we have to keep track of where is what.
80 auto ids = std::vector<int>(n);
81 std::iota(ids.begin(), ids.end(), 0);
82
83 size_t num_unconverged = n;
84
85 double niter_eff{0};
86
87 auto residual_history = std::vector<std::vector<typename StateVec::value_type>>(n);
88 int niter{0};
89 for (int iter = 0; iter < maxiters; ++iter) {
90 niter = iter;
91 // Check the residual norms in the P-norm
92 // that means whenever P is approximately inv(A)
93 // since (r, Pr) = (Ae, PAe) ~= (e, Ae)
94 // we check the errors roughly in the A-norm.
95 // When P = I, we just check the residual norm.
96
97 // C = P * R.
98 P.apply(C, R);
99
100 rhos_old = rhos;
101
102 // rhos = dot(C, R)
103 C.block_dot(R, rhos, num_unconverged);
104
105 for (size_t i = 0; i < num_unconverged; ++i) {
106 residual_history[ids[i]].push_back(std::sqrt(std::abs(rhos[i])));
107 }
108
109 auto not_converged = std::vector<int>{};
110 for (size_t i = 0; i < num_unconverged; ++i) {
111 if (std::abs(rhos[i]) > tol * tol) {
112 not_converged.push_back(i);
113 }
114 }
115
116 num_unconverged = not_converged.size();
117
118 niter_eff += static_cast<double>(num_unconverged) / n;
119
120 if (not_converged.empty()) {
121 break;
122 }
123
124 // Move everything contiguously to the front,
125 // except for X, since that's updated in-place.
126 repack(ids, not_converged); // use repack on 1-D vector
127 repack(rhos, not_converged);
128 repack(rhos_old, not_converged);
129
130 U.repack(not_converged); // use repack from the Wave_functions_wrap
131 C.repack(not_converged);
132 R.repack(not_converged);
133
134 A.repack(not_converged); // use repack from the Linear_response_operator
135 P.repack(not_converged); // use repack from the preconditioner
136
137 /* The repack on A and P changes the eigenvalue vectors of A and P respectively */
138 /* The eigenvalues of the Linear_response_operator A are sent to device when needed */
139 /* Update P.eigvals on device here */
140 if (sddk::is_device_memory(P.mem)) {
141 P.eigvals.copy_to(sddk::memory_t::device);
142 }
143
144 // In the first iteration we have U == 0, so no need for an axpy.
145 if (iter == 0) {
146 U.copy(C, num_unconverged);
147 } else {
148 for (size_t i = 0; i < num_unconverged; ++i) {
149 alphas[i] = rhos[i] / rhos_old[i];
150 }
151
152 // U[:, i] = C[:, i] + alpha[i] * U[:, i] for i < num_unconverged
153 U.block_xpby(C, alphas, num_unconverged);
154 }
155
156 // C = A * U.
157 A.multiply(1.0, U, 0.0, C, num_unconverged);
158
159 // compute the optimal distance for the search direction
160 // sigmas = dot(U, C)
161 U.block_dot(C, sigmas, num_unconverged);
162
163 // Update the solution and the residual
164 for (size_t i = 0; i < num_unconverged; ++i) {
165 alphas[i] = rhos[i] / sigmas[i];
166 }
167
168 // X[:, ids[i]] += alpha[i] * U[:, i]
169 X.block_axpy_scatter(alphas, U, ids, num_unconverged);
170
171 for (size_t i = 0; i < num_unconverged; ++i) {
172 alphas[i] *= -1;
173 }
174
175 // R[:, i] += alpha[i] * C[:, i] for i < num_unconverged
176 R.block_axpy(alphas, C, num_unconverged);
177 }
178 struct {
179 std::vector<std::vector<typename StateVec::value_type>> residual_history;
180 int niter; int niter_eff;} result{residual_history, niter, static_cast<int>(niter_eff)};
181 return result;
182}
183}
184
185/// Linear respone functions and objects.
186namespace lr {
187
190 sddk::memory_t mem;
191
192 typedef std::complex<double> value_type;
193
194 void zero()
195 {
196 x->zero(mem);
197 }
198
199 int cols() const
200 {
201 return x->num_wf().get();
202 }
203
204 void block_dot(Wave_functions_wrap const& y__, std::vector<value_type>& rhos__, size_t N__)
205 {
206 rhos__ = wf::inner_diag<double, value_type>(mem, *x, *y__.x, wf::spin_range(0),
207 wf::num_bands(N__));
208 }
209
210 void repack(std::vector<int> const& ids__)
211 {
212 PROFILE("sirius::Wave_functions_wrap::repack");
213 int j{0};
214 for (auto i : ids__) {
215 if (j != i) {
216 wf::copy(mem, *x, wf::spin_index(0), wf::band_range(i, i + 1),
217 *x, wf::spin_index(0), wf::band_range(j, j + 1));
218 }
219 ++j;
220 }
221 }
222
223 void copy(Wave_functions_wrap const &y__, size_t N__)
224 {
225 wf::copy(mem, *y__.x, wf::spin_index(0), wf::band_range(0, N__),
226 *x, wf::spin_index(0), wf::band_range(0, N__));
227 }
228
229 void block_xpby(Wave_functions_wrap const &y__, std::vector<value_type> const &alphas, int N__) {
230 std::vector<value_type> ones(N__, 1.0);
231 wf::axpby(mem, wf::spin_range(0), wf::band_range(0, N__), ones.data(), y__.x, alphas.data(), x);
232 }
233
234 void block_axpy_scatter(std::vector<value_type> const& alphas__, Wave_functions_wrap const &y__,
235 std::vector<int> const &idx__, int n__)
236 {
237 wf::axpy_scatter<double, value_type, int>(mem, wf::spin_range(0), alphas__.data(), y__.x, idx__.data(), x, n__);
238 }
239
240 void block_axpy(std::vector<value_type> const &alphas__, Wave_functions_wrap const &y__, int N__) {
241 std::vector<value_type> ones(N__, 1.0);
242 wf::axpby(mem, wf::spin_range(0), wf::band_range(0, N__), alphas__.data(), y__.x, ones.data(), x);
243 }
244};
245
247 size_t num_active;
248
249 void apply(Wave_functions_wrap &x, Wave_functions_wrap const &y) {
250 x.copy(y, num_active);
251 }
252
253 void repack(std::vector<int> const &ids) {
254 num_active = ids.size();
255 }
256};
257
262 int num_active;
263 sddk::memory_t mem;
265
266 void apply(Wave_functions_wrap &x, Wave_functions_wrap const &y) {
267 // Could avoid a copy here, but apply_precondition is in-place.
268 x.copy(y, num_active);
270 mem,
271 sr,
272 wf::num_bands(num_active),
273 *x.x,
274 H_diag,
275 S_diag,
276 eigvals);
277 }
278
279 void repack(std::vector<int> const &ids) {
280 num_active = ids.size();
281 for (size_t i = 0; i < ids.size(); ++i) {
282 eigvals[i] = eigvals[ids[i]];
283 }
284 }
285};
286
290 std::vector<double> min_eigenvals;
295 double alpha_pv;
298 sddk::memory_t mem;
300
304 std::vector<double> const &eigvals,
309 double alpha_pv,
312 sddk::memory_t mem)
313 : ctx(ctx), Hk(Hk), min_eigenvals(eigvals), Hphi(Hphi), Sphi(Sphi), evq(evq), tmp(tmp),
314 alpha_pv(alpha_pv), br(br), sr(sr), mem(mem), overlap(br.size(), br.size())
315 {
316 // I think we could just compute alpha_pv here by just making it big enough
317 // s.t. the operator H - e * S + alpha_pv * Q is positive, e.g:
318 // alpha_pv = 2 * min_eigenvals.back();
319 // but QE has a very specific way to compute it, so we just forward it from
320 // there.;
321
322 // flip the sign of the eigenvals so that the axpby works
323 for (auto &e : min_eigenvals) {
324 e *= -1;
325 }
326 }
327
328 void repack(std::vector<int> const &ids) {
329 for (size_t i = 0; i < ids.size(); ++i) {
330 min_eigenvals[i] = min_eigenvals[ids[i]];
331 }
332 }
333
334 // y[:, i] <- alpha * A * x[:, i] + beta * y[:, i] where A = (H - e_j S + constant * SQ * SQ')
335 // where SQ is S * eigenvectors.
336 void multiply(double alpha, Wave_functions_wrap x, double beta, Wave_functions_wrap y, int num_active) {
337 PROFILE("sirius::Linear_response_operator::multiply");
338 // Hphi = H * x, Sphi = S * x
339 Hk.apply_h_s<std::complex<double>>(
340 sr,
341 wf::band_range(0, num_active),
342 *x.x,
343 Hphi,
344 Sphi
345 );
346
347 std::vector<double> ones(num_active, 1.0);
348
349 // effectively tmp := (H - e * S) * x, as an axpy, modifying Hphi.
350 wf::axpby(mem, wf::spin_range(0), wf::band_range(0, num_active),
351 min_eigenvals.data(), Sphi, ones.data(), Hphi);
352 wf::copy(mem, *Hphi, wf::spin_index(0), wf::band_range(0, num_active), *tmp,
353 wf::spin_index(0), wf::band_range(0, num_active));
354
355 // Projector, add alpha_pv * (S * (evq * (evq' * (S * x))))
356
357 // overlap := evq' * (S * x)
358 wf::inner(ctx.spla_context(), mem, wf::spin_range(0), *evq, br, *Sphi, wf::band_range(0, num_active),
359 overlap, 0, 0);
360
361 // Hphi := evq * overlap
363 ctx.spla_context(),
364 mem,
365 overlap, 0, 0,
366 1.0, *evq, wf::spin_index(0), br,
367 0.0, *Hphi, wf::spin_index(0), wf::band_range(0, num_active));
368
369 Hk.apply_s<std::complex<double>>(wf::spin_range(0), wf::band_range(0, num_active), *Hphi, *Sphi);
370
371 // tmp := alpha_pv * Sphi + tmp = (H - e * S) * x + alpha_pv * (S * (evq * (evq' * (S * x))))
372 std::vector<double> alpha_pvs(num_active, alpha_pv);
373 wf::axpby(mem, wf::spin_range(0), wf::band_range(0, num_active),
374 alpha_pvs.data(), Sphi, ones.data(), tmp);
375 // y[:, i] <- alpha * tmp + beta * y[:, i]
376 std::vector<double> alphas(num_active, alpha);
377 std::vector<double> betas(num_active, beta);
378 wf::axpby(mem, wf::spin_range(0), wf::band_range(0, num_active),
379 alphas.data(), tmp, betas.data(), y.x);
380 }
381};
382
383}
384
385}
386#endif
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > apply_h_s(wf::spin_range spins__, wf::band_range br__, wf::Wave_functions< T > const &phi__, wf::Wave_functions< T > *hphi__, wf::Wave_functions< T > *sphi__) const
Apply pseudopotential H and S operators to the wavefunctions.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > apply_s(wf::spin_range spin__, wf::band_range br__, wf::Wave_functions< T > const &phi__, wf::Wave_functions< T > &sphi__) const
apply S operator
Simulation context is a set of parameters and objects describing a single simulation.
Distributed matrix.
Definition: dmatrix.hpp:56
auto num_wf() const
Return number of wave-functions.
void zero(sddk::memory_t mem__, spin_index s__, band_range br__)
Zero a spin component of the wave-functions in a band range.
Describe a range of bands.
Describe a range of spins.
Contains declaration and definition of sirius::Hamiltonian class.
Contains definition of sirius::K_point class.
memory_t
Memory types where the code can store data.
Definition: memory.hpp:71
void axpby(sddk::memory_t mem__, wf::spin_range spins__, wf::band_range br__, F const *alpha__, wf::Wave_functions< T > const *x__, F const *beta__, wf::Wave_functions< T > *y__)
Perform y <- a * x + b * y type of operation.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > transform(::spla::Context &spla_ctx__, sddk::memory_t mem__, la::dmatrix< F > const &M__, int irow0__, int jcol0__, real_type< F > alpha__, Wave_functions< T > const &wf_in__, spin_index s_in__, band_range br_in__, real_type< F > beta__, Wave_functions< T > &wf_out__, spin_index s_out__, band_range br_out__)
Apply linear transformation to the wave-functions.
void copy(sddk::memory_t mem__, Wave_functions< T > const &in__, wf::spin_index s_in__, wf::band_range br_in__, Wave_functions< F > &out__, wf::spin_index s_out__, wf::band_range br_out__)
Copy wave-functions.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
void apply_preconditioner(sddk::memory_t mem__, wf::spin_range spins__, wf::num_bands num_bands__, wf::Wave_functions< T > &res__, sddk::mdarray< T, 2 > const &h_diag__, sddk::mdarray< T, 2 > const &o_diag__, sddk::mdarray< T, 1 > const &eval__)
Apply preconditioner to the residuals.
Definition: residuals.hpp:166
Contains declaration of sirius::Non_local_operator class.
Compute residuals from the eigen-vectors and basis functions.
Contains declaration and implementation of Wave_functions class.