SIRIUS 7.5.0
Electronic structure library and applications
davidson.hpp
Go to the documentation of this file.
1// Copyright (c) 2013-2021 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file davidson.hpp
21 *
22 * \brief Davidson iterative solver implementation.
23 */
24
25#ifndef __DAVIDSON_HPP__
26#define __DAVIDSON_HPP__
27
29#include "k_point/k_point.hpp"
30#include "core/profiler.hpp"
31#include "residuals.hpp"
33
34namespace sirius {
35
36/// Result of Davidson solver.
38 /// Number of iterations.
39 int niter;
40 /// Eigen-values.
42 /// True if all bands (up and dn) are converged.
44 /// Number of unconverged bands for each spin channel.
46};
47
48enum class davidson_evp_t {
49 hamiltonian,
50 overlap
51};
52
53template <typename T, typename F>
54inline void
55project_out_subspace(::spla::Context& spla_ctx__, sddk::memory_t mem__, wf::spin_range spins__,
56 wf::Wave_functions<T>& phi__, wf::Wave_functions<T>& sphi__, int N__, int n__, la::dmatrix<F>& o__)
57{
58 PROFILE("sirius::project_out_subspace");
59
60 /* project out the old subspace:
61 * |\tilda phi_new> = |phi_new> - |phi_old><phi_old|S|phi_new> */
62 wf::inner(spla_ctx__, mem__, spins__, sphi__, wf::band_range(0, N__), phi__, wf::band_range(N__, N__ + n__),
63 o__, 0, 0);
64 for (auto s = spins__.begin(); s != spins__.end(); s++) {
65 auto sp = phi__.actual_spin_index(s);
66 wf::transform<T, F>(spla_ctx__, mem__, o__, 0, 0, -1.0, phi__, sp, wf::band_range(0, N__), 1.0,
67 phi__, sp, wf::band_range(N__, N__ + n__));
68 }
69
70 //auto norms = phi__.l2norm(device_t::CPU, spins__, N__ + n__);
71
72 //for (int i = 0; i < N__ + n__; i++) {
73 // std::cout << "phi: " << i << ", l2norm: " << norms[i] << std::endl;
74 //}
75 //inner(spla_ctx__, spins__, sphi__, 0, N__, phi_new__, 0, n__, o__, 0, 0);
76 //for (int i = 0; i < N__; i++) {
77 // for (int j = 0; j < n__; j++) {
78 // std::cout << i << " " << j << " " << o__(i, j) << std::endl;
79 // }
80 //}
81}
82
83//template <typename T>
84//inline int
85//remove_linearly_dependent(::spla::Context& spla_ctx__, sddk::spin_range spins__, sddk::Wave_functions<real_type<T>>& phi__,
86// int N__, int n__, la::dmatrix<T>& o__)
87//
88//{
89// PROFILE("sirius::remove_linearly_dependent");
90//
91// /* compute <phi | phi> */
92// inner(spla_ctx__, spins__, phi__, N__, n__, phi__, N__, n__, o__, 0, 0);
93//
94// auto la = (o__.comm().size() == 1) ? sddk::linalg_t::lapack : sddk::linalg_t::scalapack;
95// sddk::linalg(la).geqrf(n__, n__, o__, 0, 0);
96// auto diag = o__.get_diag(n__);
97//
98// auto eps = std::numeric_limits<real_type<T>>::epsilon();
99//
100// int n{0};
101// for (int i = 0; i < n__; i++) {
102// if (std::abs(diag[i]) >= eps * 10) {
103// /* shift linearly independent basis functions to the beginning of phi */
104// if (n != i) {
105// for (int ispn: spins__) {
106// phi__.copy_from(phi__, 1, ispn, N__ + i, ispn, N__ + n);
107// }
108// }
109// n++;
110// }
111// }
112// return n;
113//}
114
115
116/// Solve the eigen-problem using Davidson iterative method.
117/**
118\tparam T Precision type of wave-functions (float or double).
119\tparam F Type of the subspace matrices (fload or duble for Gamma case,
120 complex<float> or complex<doouble> for general k-point case.
121\tparam what What to solve: H|psi> = e*S|psi> or S|psi> = o|psi>
122\param [in] Hk Hamiltonian for a given k-point.
123\param [in] num_bands Number of eigen-states (bands) to compute.
124\param [in] num_mag_dims Number of magnetic dimensions (0, 1 or 3).
125\param [in,out] psi Wave-functions. On input they are used for the starting guess of the subspace basis.
126 On output they are the solutions of Hk|psi> = e S|psi> eigen-problem.
127\param [in] tolerance Lambda-function for the band energy tolerance.
128\param [in] res_tol Residual tolerance.
129\param [in] num_stpes Number of iterative steps.
130\param [in] locking Lock and do not update of the converged wave-functions.
131\param [in] subspace_size Size of the diagonalziation subspace.
132\param [in] estimate_eval Estimate eigen-values to get the converrged rersiduals.
133\param [in] extra_ortho Orthogonalize new subspace basis one extra time.
134\param [out] out Output stream.
135\param [in] verbosity Verbosity level.
136\param [in] phi_extra Pointer to the additional (fixed) auxiliary basis functions (used in LAPW).
137\return List of eigen-values.
138*/
139template <typename T, typename F, davidson_evp_t what>
140inline auto
141davidson(Hamiltonian_k<T> const& Hk__, K_point<T>& kp__, wf::num_bands num_bands__, wf::num_mag_dims num_mag_dims__,
142 wf::Wave_functions<T>& psi__, std::function<double(int, int)> tolerance__, double res_tol__,
143 int num_steps__, bool locking__, int subspace_size__, bool estimate_eval__, bool extra_ortho__,
144 std::ostream& out__, int verbosity__, wf::Wave_functions<T>* phi_extra__ = nullptr)
145{
146 PROFILE("sirius::davidson");
147
148 PROFILE_START("sirius::davidson|init");
149 auto& ctx = Hk__.H0().ctx();
150 print_memory_usage(out__, FILE_LINE);
151
152 auto pcs = env::print_checksum();
153
154
155 auto& itso = ctx.cfg().iterative_solver();
156
157 /* true if this is a non-collinear case */
158 const bool nc_mag = (num_mag_dims__.get() == 3);
159
160 auto num_md = wf::num_mag_dims(nc_mag ? 3 : 0);
161
162 /* number of spin components, treated simultaneously
163 * 1 - in case of non-magnetic or collinear calculation
164 * 2 - in case of non-collinear calculation
165 */
166 const int num_sc = nc_mag ? 2 : 1;
167
168 /* number of spinor components stored under the same band index */
169 const int num_spinors = (num_mag_dims__.get() == 1) ? 2 : 1;
170
171 /* number of spins */
172 const int num_spins = (num_mag_dims__.get() == 0) ? 1 : 2;
173
174 /* maximum subspace size */
175 int num_phi = subspace_size__ * num_bands__.get();
176 int num_extra_phi{0};
177 int nlo{0};
178 if (phi_extra__) {
179 num_extra_phi = phi_extra__->num_wf().get();
180 num_phi += num_extra_phi;
181 /* total number of local orbitals (needed for LAPW) */
182 nlo = ctx.unit_cell().mt_lo_basis_size();
183 }
184
185 if (num_phi > kp__.gklo_basis_size()) {
186 std::stringstream s;
187 s << "subspace size is too large!";
188 RTE_THROW(s);
189 }
190
191 /* alias for memory pool */
192 auto& mp = get_memory_pool(ctx.host_memory_t());
193
194 sddk::memory_t mem = ctx.processing_unit_memory_t();
195
196 /* allocate wave-functions */
197
198 using wf_t = wf::Wave_functions<T>;
199
200 bool mt_part{false};
201 //if (ctx.full_potential() && what == davidson_evp_t::hamiltonian) {
202 if (ctx.full_potential()) {
203 mt_part = true;
204 }
205
206 std::vector<wf::device_memory_guard> mg;
207
208 mg.emplace_back(psi__.memory_guard(mem, wf::copy_to::device | wf::copy_to::host));
209
210 /* auxiliary wave-functions */
211 auto phi = wave_function_factory(ctx, kp__, wf::num_bands(num_phi), num_md, mt_part);
212 mg.emplace_back(phi->memory_guard(mem));
213
214 /* Hamiltonian, applied to auxiliary wave-functions */
215 std::unique_ptr<wf_t> hphi{nullptr};
216 if (what == davidson_evp_t::hamiltonian) {
217 hphi = wave_function_factory(ctx, kp__, wf::num_bands(num_phi), num_md, mt_part);
218 mg.emplace_back(hphi->memory_guard(mem));
219 }
220
221 /* S operator, applied to auxiliary wave-functions */
222 auto sphi = wave_function_factory(ctx, kp__, wf::num_bands(num_phi), num_md, mt_part);
223 mg.emplace_back(sphi->memory_guard(mem));
224
225 /* Hamiltonain, applied to new Psi wave-functions */
226 std::unique_ptr<wf_t> hpsi{nullptr};
227 if (what == davidson_evp_t::hamiltonian) {
228 hpsi = wave_function_factory(ctx, kp__, num_bands__, num_md, mt_part);
229 mg.emplace_back(hpsi->memory_guard(mem));
230 }
231
232 /* S operator, applied to new Psi wave-functions */
233 auto spsi = wave_function_factory(ctx, kp__, num_bands__, num_md, mt_part);
234 mg.emplace_back(spsi->memory_guard(mem));
235
236 /* residuals */
237 /* res is also used as a temporary array in orthogonalize() and the first time num_extra_phi + num_bands
238 * states will be orthogonalized */
239 auto res = wave_function_factory(ctx, kp__, wf::num_bands(num_bands__.get() + num_extra_phi), num_md, mt_part);
240 mg.emplace_back(res->memory_guard(mem));
241
242 std::unique_ptr<wf_t> hphi_extra{nullptr};
243 std::unique_ptr<wf_t> sphi_extra{nullptr};
244
245 if (phi_extra__) {
246 hphi_extra = wave_function_factory(ctx, kp__, wf::num_bands(num_extra_phi), num_md, mt_part);
247 sphi_extra = wave_function_factory(ctx, kp__, wf::num_bands(num_extra_phi), num_md, mt_part);
248 mg.emplace_back(phi_extra__->memory_guard(mem, wf::copy_to::device));
249 mg.emplace_back(hphi_extra->memory_guard(mem));
250 mg.emplace_back(sphi_extra->memory_guard(mem));
251 if (pcs) {
252 auto cs = phi_extra__->checksum(mem, wf::band_range(0, num_extra_phi));
253 print_checksum("phi_extra", cs, RTE_OUT(out__));
254 }
255 }
256
257 int const bs = ctx.cyclic_block_size();
258
259 la::dmatrix<F> H(num_phi, num_phi, ctx.blacs_grid(), bs, bs, mp);
260 la::dmatrix<F> H_old(num_phi, num_phi, ctx.blacs_grid(), bs, bs, mp);
261 la::dmatrix<F> evec(num_phi, num_phi, ctx.blacs_grid(), bs, bs, mp);
262
263 int const num_ortho_steps = extra_ortho__ ? 2 : 1;
264
265 if (is_device_memory(mem)) {
266 auto& mpd = get_memory_pool(mem);
267 if (ctx.blacs_grid().comm().size() == 1) {
268 evec.allocate(mpd);
269 H.allocate(mpd);
270 }
271 }
272
273 print_memory_usage(out__, FILE_LINE);
274
275 /* get diagonal elements for preconditioning */
276 auto h_o_diag = (ctx.full_potential()) ?
277 Hk__.template get_h_o_diag_lapw<3>() : Hk__.template get_h_o_diag_pw<T, 3>();
278
279 sddk::mdarray<T, 2>* h_diag{nullptr};;
280 sddk::mdarray<T, 2>* o_diag{nullptr};
281
282 switch (what) {
283 case davidson_evp_t::hamiltonian: {
284 h_diag = &h_o_diag.first;
285 o_diag = &h_o_diag.second;
286 break;
287 }
288 case davidson_evp_t::overlap: {
289 h_diag = &h_o_diag.second;
290 o_diag = &h_o_diag.first;
291 *o_diag = [](){ return 1.0;};
292 if (is_device_memory(mem)) {
293 o_diag->copy_to(mem);
294 }
295 break;
296 }
297 }
298
299 /* checksum info */
300 if (pcs) {
301 auto cs1 = h_o_diag.first.checksum();
302 auto cs2 = h_o_diag.second.checksum();
303 kp__.comm().allreduce(&cs1, 1);
304 kp__.comm().allreduce(&cs2, 1);
305 print_checksum("h_diag", cs1, RTE_OUT(out__));
306 print_checksum("o_diag", cs2, RTE_OUT(out__));
307 auto cs = psi__.checksum(mem, wf::band_range(0, num_bands__.get()));
308 print_checksum("input spinor_wave_functions", cs, RTE_OUT(out__));
309 }
310
311 auto& std_solver = ctx.std_evp_solver();
312
313 davidson_result_t result{0, sddk::mdarray<double, 2>(num_bands__.get(), num_spinors), true, {0, 0}};
314
315 if (verbosity__ >= 1) {
316 RTE_OUT(out__) << "starting Davidson iterative solver" << std::endl
317 << " number of bands : " << num_bands__.get() << std::endl
318 << " subspace size : " << num_phi << std::endl
319 << " locking : " << locking__ << std::endl
320 << " number of spins : " << num_spins << std::endl
321 << " non-collinear : " << nc_mag << std::endl
322 << " number of extra phi : " << num_extra_phi << std::endl;
323 }
324 PROFILE_STOP("sirius::davidson|init");
325
326 PROFILE_START("sirius::davidson|iter");
327 for (int ispin_step = 0; ispin_step < num_spinors; ispin_step++) {
328 if (verbosity__ >= 1) {
329 RTE_OUT(out__) << "ispin_step " << ispin_step << " out of " << num_spinors << std::endl;
330 }
331 auto sr = nc_mag ? wf::spin_range(0, 2) : wf::spin_range(ispin_step);
332
333 print_memory_usage(out__, FILE_LINE);
334
335 /* converged vectors */
336 int num_locked{0};
337
338 sddk::mdarray<real_type<F>, 1> eval(num_bands__.get());
339 sddk::mdarray<real_type<F>, 1> eval_old(num_bands__.get());
340
341 /* lambda function thatcheck if band energy is converged */
342 auto is_converged = [&](int j__, int ispn__) -> bool {
343 return std::abs(eval[j__] - eval_old[j__]) <= tolerance__(j__ + num_locked, ispn__);
344 };
345
346 // at the moment we don't pass old eigen-values to iterative solver
347 //if (itso.init_eval_old()) {
348 // eval_old = [&](int64_t j) { return kp.band_energy(j, ispin_step); };
349 //} else {
350 eval_old = []() { return 1e10; };
351 //}
352
353 /* trial basis functions */
354 for (int ispn = 0; ispn < num_sc; ispn++) {
355 wf::copy(mem, psi__, wf::spin_index(nc_mag ? ispn : ispin_step),
356 wf::band_range(0, num_bands__.get()), *phi, wf::spin_index(ispn),
357 wf::band_range(0, num_bands__.get()));
358 }
359
360 /* extra basis functions for LAPW go after phi */
361 if (phi_extra__) {
362 if (num_mag_dims__.get() != 0) {
363 RTE_THROW("not supported");
364 }
365 wf::copy(mem, *phi_extra__, wf::spin_index(0), wf::band_range(0, num_extra_phi),
366 *phi, wf::spin_index(0), wf::band_range(num_bands__.get(), num_bands__.get() + num_extra_phi));
367 }
368
369 if (pcs) {
370 if (phi_extra__) {
371 auto cs = phi_extra__->checksum(mem, wf::band_range(0, num_extra_phi));
372 print_checksum("extra phi", cs, RTE_OUT(out__));
373 }
374 auto cs = phi->checksum(mem, wf::band_range(0, num_bands__.get()));
375 print_checksum("input phi", cs, RTE_OUT(out__));
376 }
377
378 /* current subspace size */
379 int N = num_bands__.get() + num_extra_phi;
380
381 /* first phase: setup and diagonalise reduced Hamiltonian and get eigen-values;
382 * this is done before the main itertive loop */
383
384 if (verbosity__ >= 1) {
385 RTE_OUT(out__) << "apply Hamiltonian to phi(0:" << N - 1 << ")" << std::endl;
386 }
387
388 /* apply Hamiltonian and S operators to the basis functions */
389 switch (what) {
390 case davidson_evp_t::hamiltonian: {
391 if (ctx.full_potential()) {
392 /* we save time by not applying APW part to pure local orbital basis */
393 /* aplpy full LAPW Hamiltonian to first N - nlo states */
394 Hk__.apply_fv_h_o(false, false, wf::band_range(0, N - nlo), *phi, hphi.get(), sphi.get());
395 /* aplpy local orbital part to remaining states */
396 Hk__.apply_fv_h_o(false, true, wf::band_range(N - nlo, N), *phi, hphi.get(), sphi.get());
397 /* phi_extra is constant, so the hphi_extra and sphi_extra */
398 if (phi_extra__) {
399 auto s = wf::spin_index(0);
400 wf::copy(mem, *hphi, s, wf::band_range(num_bands__.get(), num_bands__.get() + num_extra_phi),
401 *hphi_extra, s, wf::band_range(0, num_extra_phi));
402 wf::copy(mem, *sphi, s, wf::band_range(num_bands__.get(), num_bands__.get() + num_extra_phi),
403 *sphi_extra, s, wf::band_range(0, num_extra_phi));
404 }
405 } else {
406 Hk__.template apply_h_s<F>(sr, wf::band_range(0, num_bands__.get()), *phi, hphi.get(), sphi.get());
407 }
408 break;
409 }
410 case davidson_evp_t::overlap: {
411 if (ctx.full_potential()) {
412 Hk__.apply_fv_h_o(true, false, wf::band_range(0, num_bands__.get()), *phi, nullptr, sphi.get());
413 } else {
414 Hk__.template apply_h_s<F>(sr, wf::band_range(0, num_bands__.get()), *phi, nullptr, sphi.get());
415 }
416 break;
417 }
418 }
419
420 /* DEBUG */
421 if (ctx.cfg().control().verification() >= 1) {
422 /* setup eigen-value problem */
423 if (what != davidson_evp_t::overlap) {
424 generate_subspace_matrix(ctx, 0, N, 0, *phi, *hphi, H);
425 auto max_diff = check_hermitian(H, N);
426 if (max_diff > (std::is_same<T, double>::value ? 1e-12 : 1e-6)) {
427 std::stringstream s;
428 s << "H matrix is not Hermitian, max_err = " << max_diff << std::endl
429 << " happened before entering the iterative loop" << std::endl;
430 RTE_WARNING(s);
431 if (N <= 20) {
432 auto s1 = H.serialize("davidson:H_first", N, N);
433 if (kp__.comm().rank() == 0) {
434 RTE_OUT(out__) << s1.str() << std::endl;
435 }
436 }
437 }
438 }
439
440 generate_subspace_matrix(ctx, 0, N, 0, *phi, *sphi, H);
441 auto max_diff = check_hermitian(H, N);
442 if (max_diff > (std::is_same<T, double>::value ? 1e-12 : 1e-6)) {
443 std::stringstream s;
444 s << "O matrix is not Hermitian, max_err = " << max_diff << std::endl
445 << " happened before entering the iterative loop" << std::endl;
446 RTE_WARNING(s);
447 if (N <= 20) {
448 auto s1 = H.serialize("davidson:O_first", N, N);
449 if (kp__.comm().rank() == 0) {
450 RTE_OUT(out__) << s1.str() << std::endl;
451 }
452 }
453 }
454 }
455 /* END DEBUG */
456
457 if (pcs) {
458 auto cs = phi->checksum(mem, wf::band_range(0, N));
459 print_checksum("phi", cs, RTE_OUT(out__));
460 if (hphi) {
461 cs = hphi->checksum(mem, wf::band_range(0, N));
462 print_checksum("hphi", cs, RTE_OUT(out__));
463 }
464 cs = sphi->checksum(mem, wf::band_range(0, N));
465 print_checksum("sphi", cs, RTE_OUT(out__));
466 }
467
468 if (verbosity__ >= 1) {
469 RTE_OUT(out__) << "orthogonalize " << N << " states" << std::endl;
470 }
471
472 /* orthogonalize subspace basis functions and setup eigen-value problem */
473 switch (what) {
474 case davidson_evp_t::hamiltonian: {
475 wf::orthogonalize(ctx.spla_context(), mem,
476 nc_mag ? wf::spin_range(0, 2) : wf::spin_range(0), wf::band_range(0, 0), wf::band_range(0, N),
477 *phi, *sphi, {phi.get(), hphi.get(), sphi.get()}, H, *res, false);
478 /* checksum info */
479 if (pcs) {
480 auto cs = phi->checksum(mem, wf::band_range(0, N));
481 print_checksum("phi", cs, RTE_OUT(out__));
482 if (hphi) {
483 cs = hphi->checksum(mem, wf::band_range(0, N));
484 print_checksum("hphi", cs, RTE_OUT(out__));
485 }
486 cs = sphi->checksum(mem, wf::band_range(0, N));
487 print_checksum("sphi", cs, RTE_OUT(out__));
488 }
489 /* setup eigen-value problem */
490 generate_subspace_matrix(ctx, 0, N, 0, *phi, *hphi, H, &H_old);
491 break;
492 }
493 case davidson_evp_t::overlap: {
494 wf::orthogonalize(ctx.spla_context(), mem,
495 wf::spin_range(nc_mag ? 2 : 0), wf::band_range(0, 0), wf::band_range(0, N), *phi, *phi,
496 {phi.get(), sphi.get()}, H, *res, false);
497 /* setup eigen-value problem */
498 generate_subspace_matrix(ctx, 0, N, 0, *phi, *sphi, H, &H_old);
499 break;
500 }
501 }
502
503 if (verbosity__ >= 1) {
504 RTE_OUT(out__) << "set " << N << " x " << N << " subspace matrix" << std::endl;
505 }
506
507 /* DEBUG */
508 if (ctx.cfg().control().verification() >= 1) {
509 auto max_diff = check_hermitian(H, N);
510 if (max_diff > (std::is_same<real_type<F>, double>::value ? 1e-12 : 1e-6)) {
511 std::stringstream s;
512 s << "H matrix is not Hermitian, max_err = " << max_diff << std::endl
513 << " happened before entering the iterative loop" << std::endl;
514 RTE_WARNING(s);
515 if (N <= 20) {
516 auto s1 = H.serialize("davidson:H", N, N);
517 if (kp__.comm().rank() == 0) {
518 RTE_OUT(out__) << s1.str() << std::endl;
519 }
520 }
521 }
522 }
523
524 /* Seems like a smaller block size is not always improving time to solution much,
525 so keep it num_bands. */
526 int block_size = num_bands__.get();
527
528 if (verbosity__ >= 1) {
529 RTE_OUT(out__) << "diagonalize " << N << " x " << N << " Hamiltonian" << std::endl;
530 }
531
532 /* solve eigen-value problem with the size N and get lowest num_bands eigen-vectors */
533 if (std_solver.solve(N, num_bands__.get(), H, &eval[0], evec)) {
534 std::stringstream s;
535 s << "error in diagonalziation";
536 RTE_THROW(s);
537 }
538
539 ctx.evp_work_count(1);
540
541 if (verbosity__ >= 4) {
542 for (int i = 0; i < num_bands__.get(); i++) {
543 RTE_OUT(out__) << "eval[" << i << "]=" << eval[i] << std::endl;
544 }
545 }
546
547 /* tolerance for the norm of L2-norms of the residuals, used for
548 * relative convergence criterion. We can only compute this after
549 * we have the first residual norms available */
550 //F relative_frobenius_tolerance{0};
551 F current_frobenius_norm{0};
552
553 /* second phase: start iterative diagonalization */
554 for (int iter_step = 0; iter_step < num_steps__; iter_step++) {
555 if (verbosity__ >= 1) {
556 RTE_OUT(out__) << "iter_step " << iter_step << " out of " << num_steps__ << std::endl;
557 }
558
559 int num_lockable{0};
560
561 bool last_iteration = iter_step == (num_steps__ - 1);
562
563 int num_ritz = num_bands__.get() - num_locked;
564
565 /* don't compute residuals on last iteration */
566 if (!last_iteration) {
567 if (verbosity__ >= 1) {
568 RTE_OUT(out__) << "compute " << num_bands__.get() - num_locked
569 << " residuals from phi(" << num_locked << ":" << N - 1 << ")" << std::endl;
570 }
572 /* get new preconditionined residuals, and also hpsi and spsi as a by-product */
573 switch (what) {
574 case davidson_evp_t::hamiltonian: {
575 rres = residuals<T, F>(ctx, mem, sr, N, num_ritz, num_locked, eval, evec, *hphi, *sphi,
576 *hpsi, *spsi, *res, *h_diag, *o_diag, estimate_eval__, res_tol__, is_converged);
577
578 break;
579 }
580 case davidson_evp_t::overlap: {
581 rres = residuals<T, F>(ctx, mem, sr, N, num_ritz, num_locked, eval, evec, *sphi, *phi, *spsi,
582 psi__, *res, *h_diag, *o_diag, estimate_eval__, res_tol__, is_converged);
583 break;
584 }
585 }
586 result.num_unconverged[ispin_step] = rres.unconverged_residuals;
587 num_lockable = rres.num_consecutive_smallest_converged;
588 current_frobenius_norm = rres.frobenius_norm;
589
590 ///* set the relative tolerance convergence criterion */
591 //if (iter_step == 0) {
592 // relative_frobenius_tolerance = std::abs(current_frobenius_norm) * itso.relative_tolerance();
593 //}
594
595 if (verbosity__ >= 1) {
596 RTE_OUT(out__) << "number of unconverged residuals : " << result.num_unconverged[ispin_step]
597 << std::endl;
598 RTE_OUT(out__) << "current_frobenius_norm : " << current_frobenius_norm << std::endl;
599 }
600 /* checksum info */
601 if (pcs) {
602 auto br = wf::band_range(0, result.num_unconverged[ispin_step]);
603 auto cs_pw = res->checksum_pw(mem, wf::spin_index(0), br);
604 auto cs_mt = res->checksum_mt(mem, wf::spin_index(0), br);
605 auto cs = res->checksum(mem, br);
606 print_checksum("res_pw", cs_pw, RTE_OUT(out__));
607 print_checksum("res_mt", cs_mt, RTE_OUT(out__));
608 print_checksum("res", cs, RTE_OUT(out__));
609 }
610 }
611
612 /* verify convergence criteria */
613 int num_converged = num_ritz - result.num_unconverged[ispin_step];
614
615 bool converged_by_relative_tol{true};
616 /* the case when all bands are converged by energy (and thus expand_with = 0) but
617 * not converged by norm is not properly handeled at the moment; this estimation should be done on
618 * the lowest converged bands as additional step */
619 //if (iter_step > 0) {
620 // converged_by_relative_tol = (std::abs(current_frobenius_norm) < std::abs(relative_frobenius_tolerance));
621 //}
622 /* exit criteria is easier if we allow non-zero minimum number of unconverged residuals */
623 bool converged_by_absolute_tol = (num_locked + num_converged - itso.min_num_res()) >= num_bands__.get();
624
625 bool converged = converged_by_relative_tol && converged_by_absolute_tol;
626
627 /* TODO: num_unconverged might be very small at some point slowing down convergence
628 can we add more? */
629 int expand_with = std::min(result.num_unconverged[ispin_step], block_size);
630 bool should_restart = (N + expand_with > num_phi) ||
631 (num_lockable > 5 && result.num_unconverged[ispin_step] <
632 itso.early_restart() * num_lockable);
633
634 if (verbosity__ >= 3) {
635 RTE_OUT(out__) << "restart=" << (should_restart ? "yes" : "no") << ", locked=" << num_locked
636 << ", converged=" << num_converged << ", wanted=" << num_bands__
637 << ", lockable=" << num_lockable << ", num_ritz=" << num_ritz
638 << ", expansion size=" << expand_with << std::endl;
639 }
640
641 /* check if we run out of variational space or eigen-vectors are converged or it's a last iteration */
642 if (should_restart || converged || last_iteration) {
643 PROFILE("sirius::davidson|update_phi");
644 /* recompute wave-functions */
645 /* \Psi_{i} = \sum_{mu} \phi_{mu} * Z_{mu, i} */
646 for (auto s = sr.begin(); s != sr.end(); s++) {
647 auto sp = phi->actual_spin_index(s);
648 wf::transform(ctx.spla_context(), mem, evec, 0, 0, 1.0, *phi, sp, wf::band_range(num_locked, N),
649 0.0, psi__, s, wf::band_range(num_locked, num_locked + num_ritz));
650 }
651
652 /* update eigen-values */
653 for (int j = num_locked; j < num_bands__.get(); j++) {
654 result.eval(j, ispin_step) = eval[j - num_locked];
655 }
656
657 if (last_iteration && !converged && verbosity__ >= 3) {
658 RTE_OUT(out__) << "Warning: maximum number of iterations reached, but "
659 << result.num_unconverged[ispin_step]
660 << " residual(s) did not converge for k-point "
661 << kp__.vk()[0] << " " << kp__.vk()[1] << " " << kp__.vk()[2] << std::endl;
662 result.converged = false;
663 }
664
665 /* exit the loop if the eigen-vectors are converged or this is a last iteration */
666 if (converged || last_iteration) {
667 if (verbosity__ >= 3) {
668 RTE_OUT(out__) << "end of iterative diagonalization; num_unconverged: "
669 << result.num_unconverged[ispin_step]
670 << ", iteration step: " << iter_step << std::endl;
671 }
672 break;
673 } else { /* otherwise, set Psi as a new trial basis */
674 if (verbosity__ >= 3) {
675 RTE_OUT(out__) << "subspace size limit reached" << std::endl;
676 }
677
678 /* need to compute all hpsi and spsi states (not only unconverged - that was done
679 * by the residuals() function before) */
680 if (estimate_eval__) {
681 for (auto s = sr.begin(); s != sr.end(); s++) {
682 auto sp = sphi->actual_spin_index(s);
683 switch (what) {
684 case davidson_evp_t::hamiltonian: {
685 wf::transform(ctx.spla_context(), mem, evec, 0, 0, 1.0, *hphi, sp,
686 wf::band_range(num_locked, N), 0.0, *hpsi, sp,
687 wf::band_range(0, num_ritz));
688 }
689 /* attention! there is no break statement here for a reason:
690 * we want to coontinue and compute the update to spsi */
691 case davidson_evp_t::overlap: {
692 wf::transform(ctx.spla_context(), mem, evec, 0, 0, 1.0, *sphi, sp,
693 wf::band_range(num_locked, N), 0.0, *spsi, sp,
694 wf::band_range(0, num_ritz));
695 }
696 }
697 }
698 }
699
700 // TODO: consider keeping more than num_bands when nearly all Ritz vectors have converged.
701 // TODO: remove
702 int keep = num_bands__.get();
703
704 /* update basis functions, hphi and sphi */
705 for (auto s = sr.begin(); s != sr.end(); s++) {
706 auto sp = phi->actual_spin_index(s);
707 auto br = wf::band_range(num_locked, keep);
708 wf::copy(mem, psi__, s, wf::band_range(num_locked, keep), *phi, sp, br);
709 wf::copy(mem, *spsi, sp, wf::band_range(0, num_ritz), *sphi, sp, br);
710 if (what == davidson_evp_t::hamiltonian) {
711 wf::copy(mem, *hpsi, sp, wf::band_range(0, num_ritz), *hphi, sp, br);
712 }
713 }
714
715 /* remove locked Ritz values so indexing starts at unconverged eigenpairs */
716 if (locking__ && num_lockable > 0) {
717 for (int i = num_lockable; i < num_ritz; ++i) {
718 eval[i - num_lockable] = eval[i];
719 }
720 }
721
722 /* remove the locked block from the projected matrix too */
723 H_old.zero();
724 for (int i = 0; i < keep - num_locked; i++) {
725 H_old.set(i, i, eval[i]);
726 }
727
728 /* number of basis functions that we already have */
729 N = keep;
730
731 /* only when we do orthogonalization we can lock vecs */
732 if (locking__) {
733 num_locked += num_lockable;
734 }
735 }
736 }
737
738 /* expand variational subspace with new basis vectors obtatined from residuals */
739 for (auto s = sr.begin(); s != sr.end(); s++) {
740 auto sp = phi->actual_spin_index(s);
741 wf::copy(mem, *res, sp, wf::band_range(0, expand_with), *phi, sp, wf::band_range(N, N + expand_with));
742 }
743 if (should_restart && phi_extra__) {
744 wf::copy(mem, *phi_extra__, wf::spin_index(0), wf::band_range(0, num_extra_phi), *phi,
745 wf::spin_index(0), wf::band_range(N + expand_with, N + expand_with + num_extra_phi));
746 expand_with += num_extra_phi;
747 }
748 if (verbosity__ >= 1) {
749 RTE_OUT(out__) << "expanding subspace of size " << N << " with " << expand_with
750 << " new basis functions" << std::endl;
751 }
752
753 if (verbosity__ >= 1) {
754 RTE_OUT(out__) << "project out " << N << " existing basis states" << std::endl;
755 }
756 /* now, when we added new basis functions to phi, we can project out the old subspace,
757 * then apply Hamiltonian and overlap to the remaining part and then orthogonalise the
758 * new part of phi and finally, setup the eigen-vaule problem
759 *
760 * N is the number of previous basis functions
761 * expand_with is the number of new basis functions */
762 switch (what) {
763 case davidson_evp_t::hamiltonian: {
764 if (ctx.full_potential()) {
765 if (should_restart && phi_extra__) {
766 /* apply Hamiltonian to expand_with - num_extra_phi states; copy the rest */
767 Hk__.apply_fv_h_o(false, false, wf::band_range(N, N + expand_with - num_extra_phi),
768 *phi, hphi.get(), sphi.get());
769 wf::copy(mem, *hphi_extra, wf::spin_index(0), wf::band_range(0, num_extra_phi), *hphi,
770 wf::spin_index(0), wf::band_range(N + expand_with - num_extra_phi, N + expand_with));
771 wf::copy(mem, *sphi_extra, wf::spin_index(0), wf::band_range(0, num_extra_phi), *sphi,
772 wf::spin_index(0), wf::band_range(N + expand_with - num_extra_phi, N + expand_with));
773 } else {
774 Hk__.apply_fv_h_o(false, false, wf::band_range(N, N + expand_with), *phi, hphi.get(), sphi.get());
775 }
776 wf::orthogonalize(ctx.spla_context(), mem, sr, wf::band_range(0, N), wf::band_range(N, N + expand_with),
777 *phi, *sphi, {phi.get(), hphi.get(), sphi.get()}, H, *res, true);
778 } else {
779 /* for pseudopotential case we first project out the old subspace; this takes little less
780 * operations and gives a slighly more stable procedure, especially for fp32 */
781 project_out_subspace<T, F>(ctx.spla_context(), mem, sr, *phi, *sphi, N, expand_with, H);
782 Hk__.template apply_h_s<F>(sr, wf::band_range(N, N + expand_with), *phi, hphi.get(), sphi.get());
783 for (int j = 0; j < num_ortho_steps; j++) {
784 wf::orthogonalize(ctx.spla_context(), mem, sr, wf::band_range(0, N), wf::band_range(N, N + expand_with),
785 *phi, *sphi, {phi.get(), hphi.get(), sphi.get()}, H, *res, false);
786 }
787 }
788 generate_subspace_matrix<T, F>(ctx, N, expand_with, num_locked, *phi, *hphi, H, &H_old);
789 break;
790 }
791 case davidson_evp_t::overlap: {
792 project_out_subspace(ctx.spla_context(), mem, sr, *phi, *phi, N, expand_with, H);
793 if (ctx.full_potential()) {
794 Hk__.apply_fv_h_o(true, false, wf::band_range(N, N + expand_with), *phi, nullptr, sphi.get());
795 } else {
796 Hk__.template apply_h_s<F>(sr, wf::band_range(N, N + expand_with), *phi, nullptr, sphi.get());
797 }
798 for (int j = 0; j < num_ortho_steps; j++) {
799 wf::orthogonalize(ctx.spla_context(), mem, sr, wf::band_range(0, N), wf::band_range(N, N + expand_with),
800 *phi, *phi, {phi.get(), sphi.get()}, H, *res, false);
801 }
802 generate_subspace_matrix<T, F>(ctx, N, expand_with, num_locked, *phi, *sphi, H, &H_old);
803 break;
804 }
805 }
806
807 if (verbosity__ >= 3) {
808 RTE_OUT(out__) << "orthogonalized " << expand_with << " to " << N << std::endl;
809 }
810
811 if (ctx.cfg().control().verification() >= 1) {
812 auto max_diff = check_hermitian(H, N + expand_with - num_locked);
813 if (max_diff > (std::is_same<T, double>::value ? 1e-12 : 1e-6)) {
814 std::stringstream s;
815 if (verbosity__ >= 1) {
816 RTE_OUT(out__) << "H matrix of size " << N + expand_with - num_locked
817 << " is not Hermitian, maximum error: " << max_diff << std::endl;
818 }
819 }
820 }
821
822 /* increase size of the variation space */
823 N += expand_with;
824
825 /* copy the Ritz values */
826 sddk::copy(eval, eval_old);
827
828 if (verbosity__ >= 3) {
829 RTE_OUT(out__) << "computing " << num_bands__.get() << " pre-Ritz pairs" << std::endl;
830 }
831 /* solve standard eigen-value problem with the size N */
832 if (std_solver.solve(N - num_locked, num_bands__.get() - num_locked, H, &eval[0], evec)) {
833 std::stringstream s;
834 s << "error in diagonalziation";
835 RTE_THROW(s);
836 }
837
838 ctx.evp_work_count(std::pow(static_cast<double>(N - num_locked) / num_bands__.get(), 3));
839
840 if (verbosity__ >= 3) {
841 RTE_OUT(out__) << "step: " << iter_step << ", current subspace size:" << N
842 << ", maximum subspace size: " << num_phi << std::endl;
843 }
844 if (verbosity__ >= 4) {
845 for (int i = 0; i < num_bands__.get() - num_locked; i++) {
846 RTE_OUT(out__) << "eval[" << i << "]=" << eval[i]
847 << ", diff=" << std::abs(eval[i] - eval_old[i]) << std::endl;
848 }
849 }
850 result.niter++;
851
852 } /* loop over iterative steps k */
853 print_memory_usage(out__, FILE_LINE);
854 } /* loop over ispin_step */
855 PROFILE_STOP("sirius::davidson|iter");
856
857 mg.clear();
858 print_memory_usage(out__, FILE_LINE);
859 return result;
860}
861
862} // namespace
863
864#endif
Representation of Kohn-Sham Hamiltonian.
void apply_fv_h_o(bool apw_only__, bool phi_is_lo__, wf::band_range b__, wf::Wave_functions< T > &phi__, wf::Wave_functions< T > *hphi__, wf::Wave_functions< T > *ophi__) const
Apply first-variational LAPW Hamiltonian and overlap matrices.
K-point related variables and methods.
Definition: k_point.hpp:44
int gklo_basis_size() const
Basis size of LAPW+lo method.
Definition: k_point.hpp:535
Distributed matrix.
Definition: dmatrix.hpp:56
void copy_to(memory_t mem__, size_t idx0__, size_t n__, acc::stream_id sid=acc::stream_id(-1))
Copy n elements starting from idx0 from one memory type to another.
Definition: memory.hpp:1339
void zero(memory_t mem__, size_t idx0__, size_t n__)
Zero n elements starting from idx0.
Definition: memory.hpp:1316
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Definition: memory.hpp:1057
auto memory_guard(sddk::memory_t mem__, wf::copy_to copy_to__=copy_to::none) const
Return an instance of the memory guard.
Wave-functions representation.
Describe a range of bands.
Describe a range of spins.
Generate subspace-matrix in the auxiliary basis |phi>
Contains declaration and definition of sirius::Hamiltonian class.
Contains definition of sirius::K_point class.
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
Definition: memory.hpp:93
memory_t
Memory types where the code can store data.
Definition: memory.hpp:71
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
Definition: acc.hpp:320
int orthogonalize(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, band_range br_old__, band_range br_new__, Wave_functions< T > const &wf_i__, Wave_functions< T > const &wf_j__, std::vector< Wave_functions< T > * > wfs__, la::dmatrix< F > &o__, Wave_functions< T > &tmp__, bool project_out__)
Orthogonalize n new wave-functions to the N old wave-functions.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > transform(::spla::Context &spla_ctx__, sddk::memory_t mem__, la::dmatrix< F > const &M__, int irow0__, int jcol0__, real_type< F > alpha__, Wave_functions< T > const &wf_in__, spin_index s_in__, band_range br_in__, real_type< F > beta__, Wave_functions< T > &wf_out__, spin_index s_out__, band_range br_out__)
Apply linear transformation to the wave-functions.
void copy(sddk::memory_t mem__, Wave_functions< T > const &in__, wf::spin_index s_in__, wf::band_range br_in__, Wave_functions< F > &out__, wf::spin_index s_out__, wf::band_range br_out__)
Copy wave-functions.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
void generate_subspace_matrix(Simulation_context &ctx__, int N__, int n__, int num_locked__, wf::Wave_functions< T > &phi__, wf::Wave_functions< T > &op_phi__, la::dmatrix< F > &mtrx__, la::dmatrix< F > *mtrx_old__=nullptr)
Generate subspace matrix for the iterative diagonalization.
auto davidson(Hamiltonian_k< T > const &Hk__, K_point< T > &kp__, wf::num_bands num_bands__, wf::num_mag_dims num_mag_dims__, wf::Wave_functions< T > &psi__, std::function< double(int, int)> tolerance__, double res_tol__, int num_steps__, bool locking__, int subspace_size__, bool estimate_eval__, bool extra_ortho__, std::ostream &out__, int verbosity__, wf::Wave_functions< T > *phi_extra__=nullptr)
Solve the eigen-problem using Davidson iterative method.
Definition: davidson.hpp:141
A time-based profiler.
Compute residuals from the eigen-vectors and basis functions.
Result of Davidson solver.
Definition: davidson.hpp:37
bool converged
True if all bands (up and dn) are converged.
Definition: davidson.hpp:43
int num_unconverged[2]
Number of unconverged bands for each spin channel.
Definition: davidson.hpp:45
int niter
Number of iterations.
Definition: davidson.hpp:39
sddk::mdarray< double, 2 > eval
Eigen-values.
Definition: davidson.hpp:41