25#ifndef __DAVIDSON_HPP__
26#define __DAVIDSON_HPP__
48enum class davidson_evp_t {
53template <
typename T,
typename F>
55project_out_subspace(::spla::Context& spla_ctx__, sddk::memory_t mem__, wf::spin_range spins__,
56 wf::Wave_functions<T>& phi__, wf::Wave_functions<T>& sphi__,
int N__,
int n__, la::dmatrix<F>& o__)
58 PROFILE(
"sirius::project_out_subspace");
62 wf::inner(spla_ctx__, mem__, spins__, sphi__, wf::band_range(0, N__), phi__, wf::band_range(N__, N__ + n__),
64 for (
auto s = spins__.begin(); s != spins__.end(); s++) {
65 auto sp = phi__.actual_spin_index(s);
66 wf::transform<T, F>(spla_ctx__, mem__, o__, 0, 0, -1.0, phi__, sp, wf::band_range(0, N__), 1.0,
67 phi__, sp, wf::band_range(N__, N__ + n__));
139template <
typename T,
typename F, dav
idson_evp_t what>
143 int num_steps__,
bool locking__,
int subspace_size__,
bool estimate_eval__,
bool extra_ortho__,
146 PROFILE(
"sirius::davidson");
148 PROFILE_START(
"sirius::davidson|init");
149 auto& ctx = Hk__.H0().ctx();
150 print_memory_usage(out__, FILE_LINE);
152 auto pcs = env::print_checksum();
155 auto& itso = ctx.cfg().iterative_solver();
158 const bool nc_mag = (num_mag_dims__.get() == 3);
166 const int num_sc = nc_mag ? 2 : 1;
169 const int num_spinors = (num_mag_dims__.get() == 1) ? 2 : 1;
172 const int num_spins = (num_mag_dims__.get() == 0) ? 1 : 2;
175 int num_phi = subspace_size__ * num_bands__.get();
176 int num_extra_phi{0};
179 num_extra_phi = phi_extra__->num_wf().get();
180 num_phi += num_extra_phi;
182 nlo = ctx.unit_cell().mt_lo_basis_size();
187 s <<
"subspace size is too large!";
192 auto& mp = get_memory_pool(ctx.host_memory_t());
202 if (ctx.full_potential()) {
206 std::vector<wf::device_memory_guard> mg;
208 mg.emplace_back(psi__.
memory_guard(mem, wf::copy_to::device | wf::copy_to::host));
211 auto phi = wave_function_factory(ctx, kp__,
wf::num_bands(num_phi), num_md, mt_part);
212 mg.emplace_back(phi->memory_guard(mem));
215 std::unique_ptr<wf_t> hphi{
nullptr};
216 if (what == davidson_evp_t::hamiltonian) {
217 hphi = wave_function_factory(ctx, kp__,
wf::num_bands(num_phi), num_md, mt_part);
218 mg.emplace_back(hphi->memory_guard(mem));
222 auto sphi = wave_function_factory(ctx, kp__,
wf::num_bands(num_phi), num_md, mt_part);
223 mg.emplace_back(sphi->memory_guard(mem));
226 std::unique_ptr<wf_t> hpsi{
nullptr};
227 if (what == davidson_evp_t::hamiltonian) {
228 hpsi = wave_function_factory(ctx, kp__, num_bands__, num_md, mt_part);
229 mg.emplace_back(hpsi->memory_guard(mem));
233 auto spsi = wave_function_factory(ctx, kp__, num_bands__, num_md, mt_part);
234 mg.emplace_back(spsi->memory_guard(mem));
239 auto res = wave_function_factory(ctx, kp__,
wf::num_bands(num_bands__.get() + num_extra_phi), num_md, mt_part);
240 mg.emplace_back(res->memory_guard(mem));
242 std::unique_ptr<wf_t> hphi_extra{
nullptr};
243 std::unique_ptr<wf_t> sphi_extra{
nullptr};
246 hphi_extra = wave_function_factory(ctx, kp__,
wf::num_bands(num_extra_phi), num_md, mt_part);
247 sphi_extra = wave_function_factory(ctx, kp__,
wf::num_bands(num_extra_phi), num_md, mt_part);
248 mg.emplace_back(phi_extra__->memory_guard(mem, wf::copy_to::device));
249 mg.emplace_back(hphi_extra->memory_guard(mem));
250 mg.emplace_back(sphi_extra->memory_guard(mem));
252 auto cs = phi_extra__->checksum(mem,
wf::band_range(0, num_extra_phi));
253 print_checksum(
"phi_extra", cs, RTE_OUT(out__));
257 int const bs = ctx.cyclic_block_size();
260 la::dmatrix<F> H_old(num_phi, num_phi, ctx.blacs_grid(), bs, bs, mp);
261 la::dmatrix<F> evec(num_phi, num_phi, ctx.blacs_grid(), bs, bs, mp);
263 int const num_ortho_steps = extra_ortho__ ? 2 : 1;
266 auto& mpd = get_memory_pool(mem);
267 if (ctx.blacs_grid().comm().size() == 1) {
273 print_memory_usage(out__, FILE_LINE);
276 auto h_o_diag = (ctx.full_potential()) ?
277 Hk__.template get_h_o_diag_lapw<3>() : Hk__.template get_h_o_diag_pw<T, 3>();
283 case davidson_evp_t::hamiltonian: {
284 h_diag = &h_o_diag.first;
285 o_diag = &h_o_diag.second;
288 case davidson_evp_t::overlap: {
289 h_diag = &h_o_diag.second;
290 o_diag = &h_o_diag.first;
291 *o_diag = [](){
return 1.0;};
301 auto cs1 = h_o_diag.first.checksum();
302 auto cs2 = h_o_diag.second.checksum();
303 kp__.comm().allreduce(&cs1, 1);
304 kp__.comm().allreduce(&cs2, 1);
305 print_checksum(
"h_diag", cs1, RTE_OUT(out__));
306 print_checksum(
"o_diag", cs2, RTE_OUT(out__));
307 auto cs = psi__.checksum(mem,
wf::band_range(0, num_bands__.get()));
308 print_checksum(
"input spinor_wave_functions", cs, RTE_OUT(out__));
311 auto& std_solver = ctx.std_evp_solver();
315 if (verbosity__ >= 1) {
316 RTE_OUT(out__) <<
"starting Davidson iterative solver" << std::endl
317 <<
" number of bands : " << num_bands__.get() << std::endl
318 <<
" subspace size : " << num_phi << std::endl
319 <<
" locking : " << locking__ << std::endl
320 <<
" number of spins : " <<
num_spins << std::endl
321 <<
" non-collinear : " << nc_mag << std::endl
322 <<
" number of extra phi : " << num_extra_phi << std::endl;
324 PROFILE_STOP(
"sirius::davidson|init");
326 PROFILE_START(
"sirius::davidson|iter");
327 for (
int ispin_step = 0; ispin_step < num_spinors; ispin_step++) {
328 if (verbosity__ >= 1) {
329 RTE_OUT(out__) <<
"ispin_step " << ispin_step <<
" out of " << num_spinors << std::endl;
333 print_memory_usage(out__, FILE_LINE);
342 auto is_converged = [&](
int j__,
int ispn__) ->
bool {
343 return std::abs(eval[j__] - eval_old[j__]) <= tolerance__(j__ + num_locked, ispn__);
350 eval_old = []() {
return 1e10; };
354 for (
int ispn = 0; ispn < num_sc; ispn++) {
362 if (num_mag_dims__.get() != 0) {
363 RTE_THROW(
"not supported");
371 auto cs = phi_extra__->checksum(mem,
wf::band_range(0, num_extra_phi));
372 print_checksum(
"extra phi", cs, RTE_OUT(out__));
374 auto cs = phi->checksum(mem,
wf::band_range(0, num_bands__.get()));
375 print_checksum(
"input phi", cs, RTE_OUT(out__));
379 int N = num_bands__.get() + num_extra_phi;
384 if (verbosity__ >= 1) {
385 RTE_OUT(out__) <<
"apply Hamiltonian to phi(0:" << N - 1 <<
")" << std::endl;
390 case davidson_evp_t::hamiltonian: {
391 if (ctx.full_potential()) {
406 Hk__.template apply_h_s<F>(sr,
wf::band_range(0, num_bands__.get()), *phi, hphi.get(), sphi.get());
410 case davidson_evp_t::overlap: {
411 if (ctx.full_potential()) {
414 Hk__.template apply_h_s<F>(sr,
wf::band_range(0, num_bands__.get()), *phi,
nullptr, sphi.get());
421 if (ctx.cfg().control().verification() >= 1) {
423 if (what != davidson_evp_t::overlap) {
425 auto max_diff = check_hermitian(H, N);
426 if (max_diff > (std::is_same<T, double>::value ? 1e-12 : 1e-6)) {
428 s <<
"H matrix is not Hermitian, max_err = " << max_diff << std::endl
429 <<
" happened before entering the iterative loop" << std::endl;
432 auto s1 = H.serialize(
"davidson:H_first", N, N);
433 if (kp__.comm().rank() == 0) {
434 RTE_OUT(out__) << s1.str() << std::endl;
441 auto max_diff = check_hermitian(H, N);
442 if (max_diff > (std::is_same<T, double>::value ? 1e-12 : 1e-6)) {
444 s <<
"O matrix is not Hermitian, max_err = " << max_diff << std::endl
445 <<
" happened before entering the iterative loop" << std::endl;
448 auto s1 = H.serialize(
"davidson:O_first", N, N);
449 if (kp__.comm().rank() == 0) {
450 RTE_OUT(out__) << s1.str() << std::endl;
459 print_checksum(
"phi", cs, RTE_OUT(out__));
462 print_checksum(
"hphi", cs, RTE_OUT(out__));
465 print_checksum(
"sphi", cs, RTE_OUT(out__));
468 if (verbosity__ >= 1) {
469 RTE_OUT(out__) <<
"orthogonalize " << N <<
" states" << std::endl;
474 case davidson_evp_t::hamiltonian: {
477 *phi, *sphi, {phi.get(), hphi.get(), sphi.get()}, H, *res,
false);
481 print_checksum(
"phi", cs, RTE_OUT(out__));
484 print_checksum(
"hphi", cs, RTE_OUT(out__));
487 print_checksum(
"sphi", cs, RTE_OUT(out__));
493 case davidson_evp_t::overlap: {
496 {phi.get(), sphi.get()}, H, *res,
false);
503 if (verbosity__ >= 1) {
504 RTE_OUT(out__) <<
"set " << N <<
" x " << N <<
" subspace matrix" << std::endl;
508 if (ctx.cfg().control().verification() >= 1) {
509 auto max_diff = check_hermitian(H, N);
510 if (max_diff > (std::is_same<real_type<F>,
double>::value ? 1e-12 : 1e-6)) {
512 s <<
"H matrix is not Hermitian, max_err = " << max_diff << std::endl
513 <<
" happened before entering the iterative loop" << std::endl;
516 auto s1 = H.serialize(
"davidson:H", N, N);
517 if (kp__.comm().rank() == 0) {
518 RTE_OUT(out__) << s1.str() << std::endl;
526 int block_size = num_bands__.get();
528 if (verbosity__ >= 1) {
529 RTE_OUT(out__) <<
"diagonalize " << N <<
" x " << N <<
" Hamiltonian" << std::endl;
533 if (std_solver.solve(N, num_bands__.get(), H, &eval[0], evec)) {
535 s <<
"error in diagonalziation";
539 ctx.evp_work_count(1);
541 if (verbosity__ >= 4) {
542 for (
int i = 0; i < num_bands__.get(); i++) {
543 RTE_OUT(out__) <<
"eval[" << i <<
"]=" << eval[i] << std::endl;
551 F current_frobenius_norm{0};
554 for (
int iter_step = 0; iter_step < num_steps__; iter_step++) {
555 if (verbosity__ >= 1) {
556 RTE_OUT(out__) <<
"iter_step " << iter_step <<
" out of " << num_steps__ << std::endl;
561 bool last_iteration = iter_step == (num_steps__ - 1);
563 int num_ritz = num_bands__.get() - num_locked;
566 if (!last_iteration) {
567 if (verbosity__ >= 1) {
568 RTE_OUT(out__) <<
"compute " << num_bands__.get() - num_locked
569 <<
" residuals from phi(" << num_locked <<
":" << N - 1 <<
")" << std::endl;
574 case davidson_evp_t::hamiltonian: {
575 rres = residuals<T, F>(ctx, mem, sr, N, num_ritz, num_locked, eval, evec, *hphi, *sphi,
576 *hpsi, *spsi, *res, *h_diag, *o_diag, estimate_eval__, res_tol__, is_converged);
580 case davidson_evp_t::overlap: {
581 rres = residuals<T, F>(ctx, mem, sr, N, num_ritz, num_locked, eval, evec, *sphi, *phi, *spsi,
582 psi__, *res, *h_diag, *o_diag, estimate_eval__, res_tol__, is_converged);
586 result.num_unconverged[ispin_step] = rres.unconverged_residuals;
587 num_lockable = rres.num_consecutive_smallest_converged;
588 current_frobenius_norm = rres.frobenius_norm;
595 if (verbosity__ >= 1) {
596 RTE_OUT(out__) <<
"number of unconverged residuals : " << result.num_unconverged[ispin_step]
598 RTE_OUT(out__) <<
"current_frobenius_norm : " << current_frobenius_norm << std::endl;
605 auto cs = res->checksum(mem, br);
606 print_checksum(
"res_pw", cs_pw, RTE_OUT(out__));
607 print_checksum(
"res_mt", cs_mt, RTE_OUT(out__));
608 print_checksum(
"res", cs, RTE_OUT(out__));
613 int num_converged = num_ritz - result.num_unconverged[ispin_step];
615 bool converged_by_relative_tol{
true};
623 bool converged_by_absolute_tol = (num_locked + num_converged - itso.min_num_res()) >= num_bands__.get();
625 bool converged = converged_by_relative_tol && converged_by_absolute_tol;
629 int expand_with = std::min(result.num_unconverged[ispin_step], block_size);
630 bool should_restart = (N + expand_with > num_phi) ||
631 (num_lockable > 5 && result.num_unconverged[ispin_step] <
632 itso.early_restart() * num_lockable);
634 if (verbosity__ >= 3) {
635 RTE_OUT(out__) <<
"restart=" << (should_restart ?
"yes" :
"no") <<
", locked=" << num_locked
636 <<
", converged=" << num_converged <<
", wanted=" << num_bands__
637 <<
", lockable=" << num_lockable <<
", num_ritz=" << num_ritz
638 <<
", expansion size=" << expand_with << std::endl;
642 if (should_restart || converged || last_iteration) {
643 PROFILE(
"sirius::davidson|update_phi");
646 for (
auto s = sr.begin(); s != sr.end(); s++) {
647 auto sp = phi->actual_spin_index(s);
648 wf::transform(ctx.spla_context(), mem, evec, 0, 0, 1.0, *phi, sp,
wf::band_range(num_locked, N),
649 0.0, psi__, s,
wf::band_range(num_locked, num_locked + num_ritz));
653 for (
int j = num_locked; j < num_bands__.get(); j++) {
654 result.eval(j, ispin_step) = eval[j - num_locked];
657 if (last_iteration && !converged && verbosity__ >= 3) {
658 RTE_OUT(out__) <<
"Warning: maximum number of iterations reached, but "
659 << result.num_unconverged[ispin_step]
660 <<
" residual(s) did not converge for k-point "
661 << kp__.vk()[0] <<
" " << kp__.vk()[1] <<
" " << kp__.vk()[2] << std::endl;
662 result.converged =
false;
666 if (converged || last_iteration) {
667 if (verbosity__ >= 3) {
668 RTE_OUT(out__) <<
"end of iterative diagonalization; num_unconverged: "
669 << result.num_unconverged[ispin_step]
670 <<
", iteration step: " << iter_step << std::endl;
674 if (verbosity__ >= 3) {
675 RTE_OUT(out__) <<
"subspace size limit reached" << std::endl;
680 if (estimate_eval__) {
681 for (
auto s = sr.begin(); s != sr.end(); s++) {
682 auto sp = sphi->actual_spin_index(s);
684 case davidson_evp_t::hamiltonian: {
685 wf::transform(ctx.spla_context(), mem, evec, 0, 0, 1.0, *hphi, sp,
691 case davidson_evp_t::overlap: {
692 wf::transform(ctx.spla_context(), mem, evec, 0, 0, 1.0, *sphi, sp,
702 int keep = num_bands__.get();
705 for (
auto s = sr.begin(); s != sr.end(); s++) {
706 auto sp = phi->actual_spin_index(s);
710 if (what == davidson_evp_t::hamiltonian) {
716 if (locking__ && num_lockable > 0) {
717 for (
int i = num_lockable; i < num_ritz; ++i) {
718 eval[i - num_lockable] = eval[i];
724 for (
int i = 0; i < keep - num_locked; i++) {
725 H_old.set(i, i, eval[i]);
733 num_locked += num_lockable;
739 for (
auto s = sr.begin(); s != sr.end(); s++) {
740 auto sp = phi->actual_spin_index(s);
743 if (should_restart && phi_extra__) {
746 expand_with += num_extra_phi;
748 if (verbosity__ >= 1) {
749 RTE_OUT(out__) <<
"expanding subspace of size " << N <<
" with " << expand_with
750 <<
" new basis functions" << std::endl;
753 if (verbosity__ >= 1) {
754 RTE_OUT(out__) <<
"project out " << N <<
" existing basis states" << std::endl;
763 case davidson_evp_t::hamiltonian: {
764 if (ctx.full_potential()) {
765 if (should_restart && phi_extra__) {
768 *phi, hphi.get(), sphi.get());
777 *phi, *sphi, {phi.get(), hphi.get(), sphi.get()}, H, *res,
true);
781 project_out_subspace<T, F>(ctx.spla_context(), mem, sr, *phi, *sphi, N, expand_with, H);
782 Hk__.template apply_h_s<F>(sr,
wf::band_range(N, N + expand_with), *phi, hphi.get(), sphi.get());
783 for (
int j = 0; j < num_ortho_steps; j++) {
785 *phi, *sphi, {phi.get(), hphi.get(), sphi.get()}, H, *res,
false);
788 generate_subspace_matrix<T, F>(ctx, N, expand_with, num_locked, *phi, *hphi, H, &H_old);
791 case davidson_evp_t::overlap: {
792 project_out_subspace(ctx.spla_context(), mem, sr, *phi, *phi, N, expand_with, H);
793 if (ctx.full_potential()) {
796 Hk__.template apply_h_s<F>(sr,
wf::band_range(N, N + expand_with), *phi,
nullptr, sphi.get());
798 for (
int j = 0; j < num_ortho_steps; j++) {
800 *phi, *phi, {phi.get(), sphi.get()}, H, *res,
false);
802 generate_subspace_matrix<T, F>(ctx, N, expand_with, num_locked, *phi, *sphi, H, &H_old);
807 if (verbosity__ >= 3) {
808 RTE_OUT(out__) <<
"orthogonalized " << expand_with <<
" to " << N << std::endl;
811 if (ctx.cfg().control().verification() >= 1) {
812 auto max_diff = check_hermitian(H, N + expand_with - num_locked);
813 if (max_diff > (std::is_same<T, double>::value ? 1e-12 : 1e-6)) {
815 if (verbosity__ >= 1) {
816 RTE_OUT(out__) <<
"H matrix of size " << N + expand_with - num_locked
817 <<
" is not Hermitian, maximum error: " << max_diff << std::endl;
828 if (verbosity__ >= 3) {
829 RTE_OUT(out__) <<
"computing " << num_bands__.get() <<
" pre-Ritz pairs" << std::endl;
832 if (std_solver.solve(N - num_locked, num_bands__.get() - num_locked, H, &eval[0], evec)) {
834 s <<
"error in diagonalziation";
838 ctx.evp_work_count(std::pow(
static_cast<double>(N - num_locked) / num_bands__.get(), 3));
840 if (verbosity__ >= 3) {
841 RTE_OUT(out__) <<
"step: " << iter_step <<
", current subspace size:" << N
842 <<
", maximum subspace size: " << num_phi << std::endl;
844 if (verbosity__ >= 4) {
845 for (
int i = 0; i < num_bands__.get() - num_locked; i++) {
846 RTE_OUT(out__) <<
"eval[" << i <<
"]=" << eval[i]
847 <<
", diff=" << std::abs(eval[i] - eval_old[i]) << std::endl;
853 print_memory_usage(out__, FILE_LINE);
855 PROFILE_STOP(
"sirius::davidson|iter");
858 print_memory_usage(out__, FILE_LINE);
Representation of Kohn-Sham Hamiltonian.
void apply_fv_h_o(bool apw_only__, bool phi_is_lo__, wf::band_range b__, wf::Wave_functions< T > &phi__, wf::Wave_functions< T > *hphi__, wf::Wave_functions< T > *ophi__) const
Apply first-variational LAPW Hamiltonian and overlap matrices.
K-point related variables and methods.
int gklo_basis_size() const
Basis size of LAPW+lo method.
void copy_to(memory_t mem__, size_t idx0__, size_t n__, acc::stream_id sid=acc::stream_id(-1))
Copy n elements starting from idx0 from one memory type to another.
void zero(memory_t mem__, size_t idx0__, size_t n__)
Zero n elements starting from idx0.
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
auto memory_guard(sddk::memory_t mem__, wf::copy_to copy_to__=copy_to::none) const
Return an instance of the memory guard.
Wave-functions representation.
Describe a range of bands.
Describe a range of spins.
Generate subspace-matrix in the auxiliary basis |phi>
Contains declaration and definition of sirius::Hamiltonian class.
Contains definition of sirius::K_point class.
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
memory_t
Memory types where the code can store data.
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
int orthogonalize(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, band_range br_old__, band_range br_new__, Wave_functions< T > const &wf_i__, Wave_functions< T > const &wf_j__, std::vector< Wave_functions< T > * > wfs__, la::dmatrix< F > &o__, Wave_functions< T > &tmp__, bool project_out__)
Orthogonalize n new wave-functions to the N old wave-functions.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > transform(::spla::Context &spla_ctx__, sddk::memory_t mem__, la::dmatrix< F > const &M__, int irow0__, int jcol0__, real_type< F > alpha__, Wave_functions< T > const &wf_in__, spin_index s_in__, band_range br_in__, real_type< F > beta__, Wave_functions< T > &wf_out__, spin_index s_out__, band_range br_out__)
Apply linear transformation to the wave-functions.
void copy(sddk::memory_t mem__, Wave_functions< T > const &in__, wf::spin_index s_in__, wf::band_range br_in__, Wave_functions< F > &out__, wf::spin_index s_out__, wf::band_range br_out__)
Copy wave-functions.
Namespace of the SIRIUS library.
void generate_subspace_matrix(Simulation_context &ctx__, int N__, int n__, int num_locked__, wf::Wave_functions< T > &phi__, wf::Wave_functions< T > &op_phi__, la::dmatrix< F > &mtrx__, la::dmatrix< F > *mtrx_old__=nullptr)
Generate subspace matrix for the iterative diagonalization.
auto davidson(Hamiltonian_k< T > const &Hk__, K_point< T > &kp__, wf::num_bands num_bands__, wf::num_mag_dims num_mag_dims__, wf::Wave_functions< T > &psi__, std::function< double(int, int)> tolerance__, double res_tol__, int num_steps__, bool locking__, int subspace_size__, bool estimate_eval__, bool extra_ortho__, std::ostream &out__, int verbosity__, wf::Wave_functions< T > *phi_extra__=nullptr)
Solve the eigen-problem using Davidson iterative method.
Compute residuals from the eigen-vectors and basis functions.
Result of Davidson solver.
bool converged
True if all bands (up and dn) are converged.
int num_unconverged[2]
Number of unconverged bands for each spin channel.
int niter
Number of iterations.
sddk::mdarray< double, 2 > eval
Eigen-values.