SIRIUS 7.5.0
Electronic structure library and applications
diagonalize_fp.hpp
Go to the documentation of this file.
1// Copyright (c) 2013-2023 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file diagonalize_fp.hpp
21 *
22 * \brief Diagonalize full-potential LAPW Hamiltonian.
23 */
24
25#ifndef __DIAGONALIZE_FP_HPP__
26#define __DIAGONALIZE_FP_HPP__
27
28#include "davidson.hpp"
29#include "k_point/k_point.hpp"
30
31namespace sirius {
32
33inline void
34diagonalize_fp_fv_exact(Hamiltonian_k<float> const&, K_point<float>&)
35{
36 RTE_THROW("not implemented");
37}
38
39inline void
40diagonalize_fp_fv_exact(Hamiltonian_k<double> const& Hk__, K_point<double>& kp__)
41{
42 PROFILE("sirius::diagonalize_fp_fv_exact");
43
44 auto& ctx = Hk__.H0().ctx();
45
46 auto& solver = ctx.gen_evp_solver();
47
48 /* total eigen-value problem size */
49 int ngklo = kp__.gklo_basis_size();
50
51 /* block size of scalapack 2d block-cyclic distribution */
52 int bs = ctx.cyclic_block_size();
53
54 auto pcs = env::print_checksum();
55
56 la::dmatrix<std::complex<double>> h(ngklo, ngklo, ctx.blacs_grid(), bs, bs, get_memory_pool(solver.host_memory_t()));
57 la::dmatrix<std::complex<double>> o(ngklo, ngklo, ctx.blacs_grid(), bs, bs, get_memory_pool(solver.host_memory_t()));
58
59 /* setup Hamiltonian and overlap */
60 Hk__.set_fv_h_o(h, o);
61
62 if (ctx.gen_evp_solver().type() == la::ev_solver_t::cusolver) {
63 auto& mpd = get_memory_pool(sddk::memory_t::device);
64 h.allocate(mpd);
65 o.allocate(mpd);
66 kp__.fv_eigen_vectors().allocate(mpd);
67 }
68
69 if (ctx.cfg().control().verification() >= 1) {
70 double max_diff = check_hermitian(h, ngklo);
71 if (max_diff > 1e-12) {
72 std::stringstream s;
73 s << "H matrix is not hermitian" << std::endl
74 << "max error: " << max_diff;
75 RTE_THROW(s);
76 }
77 max_diff = check_hermitian(o, ngklo);
78 if (max_diff > 1e-12) {
79 std::stringstream s;
80 s << "O matrix is not hermitian" << std::endl
81 << "max error: " << max_diff;
82 RTE_THROW(s);
83 }
84 }
85
86 if (pcs) {
87 auto z1 = h.checksum(ngklo, ngklo);
88 auto z2 = o.checksum(ngklo, ngklo);
89 print_checksum("h_lapw", z1, ctx.out());
90 print_checksum("o_lapw", z2, ctx.out());
91 }
92
93 RTE_ASSERT(kp__.gklo_basis_size() > ctx.num_fv_states());
94
95 std::vector<double> eval(ctx.num_fv_states());
96
97 print_memory_usage(ctx.out(), FILE_LINE);
98 if (solver.solve(kp__.gklo_basis_size(), ctx.num_fv_states(), h, o, eval.data(), kp__.fv_eigen_vectors())) {
99 RTE_THROW("error in generalized eigen-value problem");
100 }
101 print_memory_usage(ctx.out(), FILE_LINE);
102
103 if (ctx.gen_evp_solver().type() == la::ev_solver_t::cusolver) {
104 h.deallocate(sddk::memory_t::device);
105 o.deallocate(sddk::memory_t::device);
106 kp__.fv_eigen_vectors().deallocate(sddk::memory_t::device);
107 }
108 kp__.set_fv_eigen_values(&eval[0]);
109
110 {
111 rte::ostream out(kp__.out(4), std::string(__func__));
112 for (int i = 0; i < ctx.num_fv_states(); i++) {
113 out << "eval[" << i << "]=" << eval[i] << std::endl;
114 }
115 }
116
117 if (pcs) {
118 auto z1 = kp__.fv_eigen_vectors().checksum(kp__.gklo_basis_size(), ctx.num_fv_states());
119 print_checksum("fv_eigen_vectors", z1, kp__.out(1));
120 }
121
122 /* remap to slab */
123 {
124 /* G+k vector part */
125 auto layout_in = kp__.fv_eigen_vectors().grid_layout(0, 0, kp__.gkvec().num_gvec(), ctx.num_fv_states());
126 auto layout_out = kp__.fv_eigen_vectors_slab().grid_layout_pw(wf::spin_index(0), wf::band_range(0, ctx.num_fv_states()));
127 costa::transform(layout_in, layout_out, 'N', la::constant<std::complex<double>>::one(),
128 la::constant<std::complex<double>>::zero(), kp__.comm().native());
129 }
130 {
131 /* muffin-tin part */
132 auto layout_in = kp__.fv_eigen_vectors().grid_layout(kp__.gkvec().num_gvec(), 0,
133 ctx.unit_cell().mt_lo_basis_size(), ctx.num_fv_states());
134 auto layout_out = kp__.fv_eigen_vectors_slab().grid_layout_mt(wf::spin_index(0), wf::band_range(0, ctx.num_fv_states()));
135 costa::transform(layout_in, layout_out, 'N', la::constant<std::complex<double>>::one(),
136 la::constant<std::complex<double>>::zero(), kp__.comm().native());
137 }
138
139 if (pcs) {
140 auto z1 = kp__.fv_eigen_vectors_slab().checksum_pw(sddk::memory_t::host, wf::spin_index(0), wf::band_range(0, ctx.num_fv_states()));
141 auto z2 = kp__.fv_eigen_vectors_slab().checksum_mt(sddk::memory_t::host, wf::spin_index(0), wf::band_range(0, ctx.num_fv_states()));
142 print_checksum("fv_eigen_vectors_slab", z1 + z2, kp__.out(1));
143 }
144
145 /* renormalize wave-functions */
146 if (ctx.valence_relativity() == relativity_t::iora) {
147
148 std::vector<int> num_mt_coeffs(ctx.unit_cell().num_atoms());
149 for (int ia = 0; ia < ctx.unit_cell().num_atoms(); ia++) {
150 num_mt_coeffs[ia] = ctx.unit_cell().atom(ia).mt_lo_basis_size();
151 }
152 wf::Wave_functions<double> ofv_new(kp__.gkvec_sptr(), num_mt_coeffs, wf::num_mag_dims(0),
153 wf::num_bands(ctx.num_fv_states()), sddk::memory_t::host);
154
155 {
156 auto mem = ctx.processing_unit() == sddk::device_t::CPU ? sddk::memory_t::host : sddk::memory_t::device;
157 auto mg1 = kp__.fv_eigen_vectors_slab().memory_guard(mem, wf::copy_to::device);
158 auto mg2 = ofv_new.memory_guard(mem, wf::copy_to::host);
159
160 Hk__.apply_fv_h_o(false, false, wf::band_range(0, ctx.num_fv_states()), kp__.fv_eigen_vectors_slab(),
161 nullptr, &ofv_new);
162 }
163
164 auto norm1 = wf::inner_diag<double, std::complex<double>>(sddk::memory_t::host, kp__.fv_eigen_vectors_slab(),
165 ofv_new, wf::spin_range(0), wf::num_bands(ctx.num_fv_states()));
166
167 std::vector<double> norm;
168 for (auto e : norm1) {
169 norm.push_back(1 / std::sqrt(std::real(e)));
170 }
171
172 wf::axpby<double, double>(sddk::memory_t::host, wf::spin_range(0), wf::band_range(0, ctx.num_fv_states()),
173 nullptr, nullptr, norm.data(), &kp__.fv_eigen_vectors_slab());
174 }
175
176 //if (ctx.cfg().control().verification() >= 2) {
177 // kp.message(1, __function_name__, "%s", "checking application of H and O\n");
178 // /* check application of H and O */
179 // sddk::Wave_functions<double> hphi(kp.gkvec_partition(), unit_cell_.num_atoms(),
180 // [this](int ia) { return unit_cell_.atom(ia).mt_lo_basis_size(); }, ctx.num_fv_states(),
181 // ctx.preferred_memory_t());
182 // sddk::Wave_functions<double> ophi(kp.gkvec_partition(), unit_cell_.num_atoms(),
183 // [this](int ia) { return unit_cell_.atom(ia).mt_lo_basis_size(); }, ctx.num_fv_states(),
184 // ctx.preferred_memory_t());
185
186 // if (ctx.processing_unit() == sddk::device_t::GPU) {
187 // kp.fv_eigen_vectors_slab().allocate(sddk::spin_range(0), sddk::memory_t::device);
188 // kp.fv_eigen_vectors_slab().copy_to(sddk::spin_range(0), sddk::memory_t::device, 0, ctx.num_fv_states());
189 // hphi.allocate(sddk::spin_range(0), sddk::memory_t::device);
190 // ophi.allocate(sddk::spin_range(0), sddk::memory_t::device);
191 // }
192
193 // Hk__.apply_fv_h_o(false, false, 0, ctx.num_fv_states(), kp.fv_eigen_vectors_slab(), &hphi, &ophi);
194
195 // la::dmatrix<std::complex<double>> hmlt(ctx.num_fv_states(), ctx.num_fv_states(), ctx.blacs_grid(),
196 // ctx.cyclic_block_size(), ctx.cyclic_block_size());
197 // la::dmatrix<std::complex<double>> ovlp(ctx.num_fv_states(), ctx.num_fv_states(), ctx.blacs_grid(),
198 // ctx.cyclic_block_size(), ctx.cyclic_block_size());
199
200 // inner(ctx.spla_context(), sddk::spin_range(0), kp.fv_eigen_vectors_slab(), 0, ctx.num_fv_states(),
201 // hphi, 0, ctx.num_fv_states(), hmlt, 0, 0);
202 // inner(ctx.spla_context(), sddk::spin_range(0), kp.fv_eigen_vectors_slab(), 0, ctx.num_fv_states(),
203 // ophi, 0, ctx.num_fv_states(), ovlp, 0, 0);
204
205 // double max_diff{0};
206 // for (int i = 0; i < hmlt.num_cols_local(); i++) {
207 // int icol = hmlt.icol(i);
208 // for (int j = 0; j < hmlt.num_rows_local(); j++) {
209 // int jrow = hmlt.irow(j);
210 // if (icol == jrow) {
211 // max_diff = std::max(max_diff, std::abs(hmlt(j, i) - eval[icol]));
212 // } else {
213 // max_diff = std::max(max_diff, std::abs(hmlt(j, i)));
214 // }
215 // }
216 // }
217 // if (max_diff > 1e-9) {
218 // std::stringstream s;
219 // s << "application of Hamiltonian failed, maximum error: " << max_diff;
220 // WARNING(s);
221 // }
222
223 // max_diff = 0;
224 // for (int i = 0; i < ovlp.num_cols_local(); i++) {
225 // int icol = ovlp.icol(i);
226 // for (int j = 0; j < ovlp.num_rows_local(); j++) {
227 // int jrow = ovlp.irow(j);
228 // if (icol == jrow) {
229 // max_diff = std::max(max_diff, std::abs(ovlp(j, i) - 1.0));
230 // } else {
231 // max_diff = std::max(max_diff, std::abs(ovlp(j, i)));
232 // }
233 // }
234 // }
235 // if (max_diff > 1e-9) {
236 // std::stringstream s;
237 // s << "application of overlap failed, maximum error: " << max_diff;
238 // WARNING(s);
239 // }
240 //}
241}
242
243inline void
244get_singular_components(Hamiltonian_k<double> const& Hk__, K_point<double>& kp__, double itsol_tol__)
245{
246 PROFILE("sirius::get_singular_components");
247
248 auto& ctx = Hk__.H0().ctx();
249
250 int ncomp = kp__.singular_components().num_wf().get();
251
252 RTE_OUT(ctx.out(3)) << "number of singular components: " << ncomp << std::endl;
253
254 auto& itso = ctx.cfg().iterative_solver();
255
256 std::stringstream s;
257 std::ostream* out = (kp__.comm().rank() == 0) ? &std::cout : &s;
258
259 auto result = davidson<double, std::complex<double>, davidson_evp_t::overlap>(Hk__, kp__, wf::num_bands(ncomp), wf::num_mag_dims(0),
260 kp__.singular_components(),
261 [&](int i, int ispn){ return itsol_tol__; }, itso.residual_tolerance(), itso.num_steps(), itso.locking(),
262 itso.subspace_size(), itso.converge_by_energy(), itso.extra_ortho(), *out, ctx.verbosity() - 2);
263
264 RTE_OUT(kp__.out(2)) << "smallest eigen-value of the singular components: " << result.eval[0] << std::endl;
265 for (int i = 0; i < ncomp; i++) {
266 RTE_OUT(kp__.out(3)) << "singular component eigen-value[" << i << "]=" << result.eval[i] << std::endl;
267 }
268}
269
270inline void
271diagonalize_fp_fv_davidson(Hamiltonian_k<float> const&, K_point<float>&, double)
272{
273 RTE_THROW("not implemented");
274}
275
276inline void
277diagonalize_fp_fv_davidson(Hamiltonian_k<double> const& Hk__, K_point<double>& kp__, double itsol_tol__)
278{
279 PROFILE("sirius::diagonalize_fp_fv_davidson");
280
281 auto& ctx = Hk__.H0().ctx();
282
283 auto& itso = ctx.cfg().iterative_solver();
284
285 /* number of singular components */
286 int ncomp = kp__.singular_components().num_wf().get();
287
288 if (ncomp) {
289 /* compute eigen-vectors of O^{APW-APW} */
290 get_singular_components(Hk__, kp__, itsol_tol__);
291 }
292
293 /* total number of local orbitals */
294 int nlo = ctx.unit_cell().mt_lo_basis_size();
295
296 auto phi_extra_new = wave_function_factory(ctx, kp__, wf::num_bands(nlo + ncomp), wf::num_mag_dims(0), true);
297 phi_extra_new->zero(sddk::memory_t::host, wf::spin_index(0), wf::band_range(0, nlo + ncomp));
298
299 if (ncomp) {
300 /* copy [0, ncomp) from kp.singular_components() to [0, ncomp) in phi_extra */
301 wf::copy(sddk::memory_t::host, kp__.singular_components(), wf::spin_index(0), wf::band_range(0, ncomp),
302 *phi_extra_new, wf::spin_index(0), wf::band_range(0, ncomp));
303 }
304
305 std::vector<int> offset_lo(ctx.unit_cell().num_atoms());
306 std::generate(offset_lo.begin(), offset_lo.end(),
307 [n = 0, ia = 0, &ctx] () mutable
308 {
309 int offs = n;
310 n += ctx.unit_cell().atom(ia++).mt_lo_basis_size();
311 return offs;
312 });
313
314 /* add pure local orbitals to the basis staring from ncomp index */
315 if (nlo) {
316 for (auto it : phi_extra_new->spl_num_atoms()) {
317 for (int xi = 0; xi < ctx.unit_cell().atom(it.i).mt_lo_basis_size(); xi++) {
318 phi_extra_new->mt_coeffs(xi, it.li, wf::spin_index(0),
319 wf::band_index(offset_lo[it.i] + xi + ncomp)) = 1.0;
320 }
321 }
322 }
323 if (env::print_checksum()) {
324 auto cs = phi_extra_new->checksum(sddk::memory_t::host, wf::band_range(0, nlo + ncomp));
325 if (kp__.comm().rank() == 0) {
326 print_checksum("phi_extra", cs, RTE_OUT(ctx.out()));
327 }
328 }
329
330 auto tolerance = [&](int j__, int ispn__) -> double {
331 return itsol_tol__;
332 };
333
334 std::stringstream s;
335 std::ostream* out = (kp__.comm().rank() == 0) ? &std::cout : &s;
336
337 auto result = davidson<double, std::complex<double>, davidson_evp_t::hamiltonian>(Hk__, kp__,
338 wf::num_bands(ctx.num_fv_states()), wf::num_mag_dims(0), kp__.fv_eigen_vectors_slab(), tolerance,
339 itso.residual_tolerance(), itso.num_steps(), itso.locking(), itso.subspace_size(),
340 itso.converge_by_energy(), itso.extra_ortho(), *out, ctx.verbosity() - 2,
341 phi_extra_new.get());
342
343 kp__.set_fv_eigen_values(&result.eval[0]);
344}
345
346inline void
347diagonalize_fp_sv(Hamiltonian_k<float> const&, K_point<float>&)
348{
349 RTE_THROW("not implemented");
350}
351
352/// Diagonalize second-variational Hamiltonian.
353inline void
354diagonalize_fp_sv(Hamiltonian_k<double> const& Hk__, K_point<double>& kp)
355{
356 PROFILE("sirius::diagonalize_fp_sv");
357
358 // auto& kp = Hk__.kp();
359 auto& ctx = Hk__.H0().ctx();
360
361 if (!ctx.need_sv()) {
362 kp.bypass_sv();
363 return;
364 }
365
366 auto pcs = env::print_checksum();
367
368 int nfv = ctx.num_fv_states();
369 int bs = ctx.cyclic_block_size();
370
371 sddk::mdarray<double, 2> band_energies(ctx.num_bands(), ctx.num_spinors());
372
373 std::vector<int> num_mt_coeffs(ctx.unit_cell().num_atoms());
374 for (int ia = 0; ia < ctx.unit_cell().num_atoms(); ia++) {
375 num_mt_coeffs[ia] = ctx.unit_cell().atom(ia).mt_basis_size();
376 }
377
378 /* product of the second-variational Hamiltonian and a first-variational wave-function */
379 std::vector<wf::Wave_functions<double>> hpsi;
380 for (int i = 0; i < ctx.num_mag_comp(); i++) {
381 hpsi.push_back(wf::Wave_functions<double>(kp.gkvec_sptr(), num_mt_coeffs,
382 wf::num_mag_dims(0), wf::num_bands(nfv), ctx.host_memory_t()));
383 }
384
385 if (pcs) {
386 auto cs1 = kp.fv_states().checksum_pw(sddk::memory_t::host, wf::spin_index(0), wf::band_range(0, nfv));
387 auto cs2 = kp.fv_states().checksum_mt(sddk::memory_t::host, wf::spin_index(0), wf::band_range(0, nfv));
388 if (kp.comm().rank() == 0) {
389 print_checksum("psi_pw", cs1, RTE_OUT(ctx.out()));
390 print_checksum("psi_mt", cs2, RTE_OUT(ctx.out()));
391 }
392 }
393
394 /* compute product of magnetic field and wave-function */
395 if (ctx.num_spins() == 2) {
396 Hk__.apply_b(kp.fv_states(), hpsi);
397 } else {
398 hpsi[0].zero(sddk::memory_t::host, wf::spin_index(0), wf::band_range(0, nfv));
399 }
400
401 print_memory_usage(ctx.out(), FILE_LINE);
402
403 //== if (ctx.uj_correction())
404 //== {
405 //== apply_uj_correction<uu>(kp->fv_states_col(), hpsi);
406 //== if (ctx.num_mag_dims() != 0) apply_uj_correction<dd>(kp->fv_states_col(), hpsi);
407 //== if (ctx.num_mag_dims() == 3)
408 //== {
409 //== apply_uj_correction<ud>(kp->fv_states_col(), hpsi);
410 //== if (ctx.std_evp_solver()->parallel()) apply_uj_correction<du>(kp->fv_states_col(), hpsi);
411 //== }
412 //== }
413
414 if (ctx.so_correction()) {
415 Hk__.H0().apply_so_correction(kp.fv_states(), hpsi);
416 }
417
418 std::vector<wf::device_memory_guard> mg;
419 mg.emplace_back(kp.fv_states().memory_guard(ctx.processing_unit_memory_t(), wf::copy_to::device));
420 for (int i = 0; i < ctx.num_mag_comp(); i++) {
421 mg.emplace_back(hpsi[i].memory_guard(ctx.processing_unit_memory_t(), wf::copy_to::device));
422 }
423
424 print_memory_usage(ctx.out(), FILE_LINE);
425
426 auto& std_solver = ctx.std_evp_solver();
427
428 wf::band_range br(0, nfv);
429 wf::spin_range sr(0, 1);
430
431 auto mem = ctx.processing_unit_memory_t();
432
433 if (ctx.num_mag_dims() != 3) {
434 la::dmatrix<std::complex<double>> h(nfv, nfv, ctx.blacs_grid(), bs, bs);
435 if (ctx.blacs_grid().comm().size() == 1 && ctx.processing_unit() == sddk::device_t::GPU) {
436 h.allocate(get_memory_pool(sddk::memory_t::device));
437 }
438 /* perform one or two consecutive diagonalizations */
439 for (int ispn = 0; ispn < ctx.num_spins(); ispn++) {
440 if (pcs) {
441 auto cs1 = hpsi[ispn].checksum_pw(mem, wf::spin_index(0), wf::band_range(0, nfv));
442 auto cs2 = hpsi[ispn].checksum_mt(mem, wf::spin_index(0), wf::band_range(0, nfv));
443 if (kp.comm().rank() == 0) {
444 std::stringstream s1;
445 s1 << "hpsi_pw_" << ispn;
446 print_checksum(s1.str(), cs1, RTE_OUT(ctx.out()));
447 std::stringstream s2;
448 s2 << "hpsi_mt_" << ispn;
449 print_checksum(s2.str(), cs2, RTE_OUT(ctx.out()));
450 }
451 }
452 /* compute <wf_i | h * wf_j> */
453 wf::inner(ctx.spla_context(), mem, sr, kp.fv_states(), br, hpsi[ispn], br, h, 0, 0);
454
455 for (int i = 0; i < nfv; i++) {
456 h.add(i, i, kp.fv_eigen_value(i));
457 }
458 PROFILE("sirius::diagonalize_fp_sv|stdevp");
459 std_solver.solve(nfv, nfv, h, &band_energies(0, ispn), kp.sv_eigen_vectors(ispn));
460 }
461 } else {
462 int nb = ctx.num_bands();
463 la::dmatrix<std::complex<double>> h(nb, nb, ctx.blacs_grid(), bs, bs);
464 if (ctx.blacs_grid().comm().size() == 1 && ctx.processing_unit() == sddk::device_t::GPU) {
465 h.allocate(get_memory_pool(sddk::memory_t::device));
466 }
467 /* compute <wf_i | h * wf_j> for up-up block */
468 wf::inner(ctx.spla_context(), mem, sr, kp.fv_states(), br, hpsi[0], br, h, 0, 0);
469 /* compute <wf_i | h * wf_j> for dn-dn block */
470 wf::inner(ctx.spla_context(), mem, sr, kp.fv_states(), br, hpsi[1], br, h, nfv, nfv);
471 /* compute <wf_i | h * wf_j> for up-dn block */
472 wf::inner(ctx.spla_context(), mem, sr, kp.fv_states(), br, hpsi[2], br, h, 0, nfv);
473
474 if (kp.comm().size() == 1) {
475 for (int i = 0; i < nfv; i++) {
476 for (int j = 0; j < nfv; j++) {
477 h(nfv + j, i) = std::conj(h(i, nfv + j));
478 }
479 }
480 } else {
481 la::wrap(la::lib_t::scalapack).tranc(nfv, nfv, h, 0, nfv, h, nfv, 0);
482 }
483
484 for (int i = 0; i < nfv; i++) {
485 h.add(i, i, kp.fv_eigen_value(i));
486 h.add(i + nfv, i + nfv, kp.fv_eigen_value(i));
487 }
488 PROFILE("sirius::diagonalize_fp_sv|stdevp");
489 std_solver.solve(nb, nb, h, &band_energies(0, 0), kp.sv_eigen_vectors(0));
490 }
491
492 for (int ispn = 0; ispn < ctx.num_spinors(); ispn++) {
493 for (int j = 0; j < ctx.num_bands(); j++) {
494 kp.band_energy(j, ispn, band_energies(j, ispn));
495 }
496 }
497}
498
499/// Diagonalize a full-potential LAPW Hamiltonian.
500template <typename T>
501inline void
502diagonalize_fp(Hamiltonian_k<T> const& Hk__, K_point<T>& kp__, double itsol_tol__)
503{
504 auto& ctx = Hk__.H0().ctx();
505 print_memory_usage(ctx.out(), FILE_LINE);
506 if (ctx.cfg().control().use_second_variation()) {
507 /* solve non-magnetic Hamiltonian (so-called first variation) */
508 auto& itso = ctx.cfg().iterative_solver();
509 if (itso.type() == "exact") {
510 diagonalize_fp_fv_exact(Hk__, kp__);
511 } else if (itso.type() == "davidson") {
512 diagonalize_fp_fv_davidson(Hk__, kp__, itsol_tol__);
513 }
514 /* generate first-variational states */
515 kp__.generate_fv_states();
516 /* solve magnetic Hamiltonian */
517 diagonalize_fp_sv(Hk__, kp__);
518 /* generate spinor wave-functions */
520 } else {
521 RTE_THROW("not implemented");
522 //diag_full_potential_single_variation();
523 }
524 print_memory_usage(ctx.out(), FILE_LINE);
525}
526
527}
528
529#endif
void apply_so_correction(wf::Wave_functions< T > &psi__, std::vector< wf::Wave_functions< T > > &hpsi__) const
Apply SO correction to the first-variational LAPW wave-functions.
void apply_b(wf::Wave_functions< T > &psi__, std::vector< wf::Wave_functions< T > > &bpsi__) const
Apply magnetic field to first-variational LAPW wave-functions.
auto gkvec_sptr() const
Return shared pointer to gkvec object.
Definition: k_point.hpp:381
void generate_spinor_wave_functions()
Generate two-component spinor wave functions.
double band_energy(int j__, int ispn__) const
Get band energy.
Definition: k_point.hpp:413
void generate_fv_states()
Generate first-variational states from eigen-vectors.
Distributed matrix.
Definition: dmatrix.hpp:56
void tranc(ftn_int m, ftn_int n, dmatrix< T > &A, ftn_int ia, ftn_int ja, dmatrix< T > &C, ftn_int ic, ftn_int jc) const
Conjugate transpose matrix.
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Definition: memory.hpp:1057
Describe a range of bands.
Describe a range of spins.
Davidson iterative solver implementation.
Contains definition of sirius::K_point class.
void zero(T *ptr__, size_t n__)
Zero the device memory.
Definition: acc.hpp:397
@ scalapack
CPU ScaLAPACK.
@ cusolver
CUDA eigen-solver.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > transform(::spla::Context &spla_ctx__, sddk::memory_t mem__, la::dmatrix< F > const &M__, int irow0__, int jcol0__, real_type< F > alpha__, Wave_functions< T > const &wf_in__, spin_index s_in__, band_range br_in__, real_type< F > beta__, Wave_functions< T > &wf_out__, spin_index s_out__, band_range br_out__)
Apply linear transformation to the wave-functions.
void copy(sddk::memory_t mem__, Wave_functions< T > const &in__, wf::spin_index s_in__, wf::band_range br_in__, Wave_functions< F > &out__, wf::spin_index s_out__, wf::band_range br_out__)
Copy wave-functions.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
void diagonalize_fp(Hamiltonian_k< T > const &Hk__, K_point< T > &kp__, double itsol_tol__)
Diagonalize a full-potential LAPW Hamiltonian.
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165