25#ifndef __DIAGONALIZE_FP_HPP__
26#define __DIAGONALIZE_FP_HPP__
34diagonalize_fp_fv_exact(Hamiltonian_k<float>
const&, K_point<float>&)
36 RTE_THROW(
"not implemented");
40diagonalize_fp_fv_exact(Hamiltonian_k<double>
const& Hk__, K_point<double>& kp__)
42 PROFILE(
"sirius::diagonalize_fp_fv_exact");
44 auto& ctx = Hk__.H0().ctx();
46 auto& solver = ctx.gen_evp_solver();
49 int ngklo = kp__.gklo_basis_size();
52 int bs = ctx.cyclic_block_size();
54 auto pcs = env::print_checksum();
56 la::dmatrix<std::complex<double>> h(ngklo, ngklo, ctx.blacs_grid(), bs, bs, get_memory_pool(solver.host_memory_t()));
57 la::dmatrix<std::complex<double>> o(ngklo, ngklo, ctx.blacs_grid(), bs, bs, get_memory_pool(solver.host_memory_t()));
60 Hk__.set_fv_h_o(h, o);
63 auto& mpd = get_memory_pool(sddk::memory_t::device);
66 kp__.fv_eigen_vectors().allocate(mpd);
69 if (ctx.cfg().control().verification() >= 1) {
70 double max_diff = check_hermitian(h, ngklo);
71 if (max_diff > 1e-12) {
73 s <<
"H matrix is not hermitian" << std::endl
74 <<
"max error: " << max_diff;
77 max_diff = check_hermitian(o, ngklo);
78 if (max_diff > 1e-12) {
80 s <<
"O matrix is not hermitian" << std::endl
81 <<
"max error: " << max_diff;
87 auto z1 = h.checksum(ngklo, ngklo);
88 auto z2 = o.checksum(ngklo, ngklo);
89 print_checksum(
"h_lapw", z1, ctx.out());
90 print_checksum(
"o_lapw", z2, ctx.out());
93 RTE_ASSERT(kp__.gklo_basis_size() > ctx.num_fv_states());
95 std::vector<double> eval(ctx.num_fv_states());
97 print_memory_usage(ctx.out(), FILE_LINE);
98 if (solver.solve(kp__.gklo_basis_size(), ctx.num_fv_states(), h, o, eval.data(), kp__.fv_eigen_vectors())) {
99 RTE_THROW(
"error in generalized eigen-value problem");
101 print_memory_usage(ctx.out(), FILE_LINE);
104 h.deallocate(sddk::memory_t::device);
105 o.deallocate(sddk::memory_t::device);
106 kp__.fv_eigen_vectors().deallocate(sddk::memory_t::device);
108 kp__.set_fv_eigen_values(&eval[0]);
111 rte::ostream out(kp__.out(4), std::string(__func__));
112 for (
int i = 0; i < ctx.num_fv_states(); i++) {
113 out <<
"eval[" << i <<
"]=" << eval[i] << std::endl;
118 auto z1 = kp__.fv_eigen_vectors().checksum(kp__.gklo_basis_size(), ctx.num_fv_states());
119 print_checksum(
"fv_eigen_vectors", z1, kp__.out(1));
125 auto layout_in = kp__.fv_eigen_vectors().grid_layout(0, 0, kp__.gkvec().num_gvec(), ctx.num_fv_states());
126 auto layout_out = kp__.fv_eigen_vectors_slab().grid_layout_pw(wf::spin_index(0), wf::band_range(0, ctx.num_fv_states()));
127 costa::transform(layout_in, layout_out,
'N', la::constant<std::complex<double>>::one(),
128 la::constant<std::complex<double>>
::zero(), kp__.comm().native());
132 auto layout_in = kp__.fv_eigen_vectors().grid_layout(kp__.gkvec().num_gvec(), 0,
133 ctx.unit_cell().mt_lo_basis_size(), ctx.num_fv_states());
134 auto layout_out = kp__.fv_eigen_vectors_slab().grid_layout_mt(wf::spin_index(0), wf::band_range(0, ctx.num_fv_states()));
135 costa::transform(layout_in, layout_out,
'N', la::constant<std::complex<double>>::one(),
136 la::constant<std::complex<double>>
::zero(), kp__.comm().native());
140 auto z1 = kp__.fv_eigen_vectors_slab().checksum_pw(sddk::memory_t::host, wf::spin_index(0), wf::band_range(0, ctx.num_fv_states()));
141 auto z2 = kp__.fv_eigen_vectors_slab().checksum_mt(sddk::memory_t::host, wf::spin_index(0), wf::band_range(0, ctx.num_fv_states()));
142 print_checksum(
"fv_eigen_vectors_slab", z1 + z2, kp__.out(1));
146 if (ctx.valence_relativity() == relativity_t::iora) {
148 std::vector<int> num_mt_coeffs(ctx.unit_cell().num_atoms());
149 for (
int ia = 0; ia < ctx.unit_cell().num_atoms(); ia++) {
150 num_mt_coeffs[ia] = ctx.unit_cell().atom(ia).mt_lo_basis_size();
152 wf::Wave_functions<double> ofv_new(kp__.gkvec_sptr(), num_mt_coeffs, wf::num_mag_dims(0),
153 wf::num_bands(ctx.num_fv_states()), sddk::memory_t::host);
156 auto mem = ctx.processing_unit() == sddk::device_t::CPU ? sddk::memory_t::host : sddk::memory_t::device;
157 auto mg1 = kp__.fv_eigen_vectors_slab().memory_guard(mem, wf::copy_to::device);
158 auto mg2 = ofv_new.memory_guard(mem, wf::copy_to::host);
160 Hk__.apply_fv_h_o(
false,
false, wf::band_range(0, ctx.num_fv_states()), kp__.fv_eigen_vectors_slab(),
164 auto norm1 = wf::inner_diag<double, std::complex<double>>(sddk::memory_t::host, kp__.fv_eigen_vectors_slab(),
165 ofv_new, wf::spin_range(0), wf::num_bands(ctx.num_fv_states()));
167 std::vector<double> norm;
168 for (
auto e : norm1) {
169 norm.push_back(1 / std::sqrt(std::real(e)));
172 wf::axpby<double, double>(sddk::memory_t::host, wf::spin_range(0), wf::band_range(0, ctx.num_fv_states()),
173 nullptr,
nullptr, norm.data(), &kp__.fv_eigen_vectors_slab());
244get_singular_components(Hamiltonian_k<double>
const& Hk__, K_point<double>& kp__,
double itsol_tol__)
246 PROFILE(
"sirius::get_singular_components");
248 auto& ctx = Hk__.H0().ctx();
250 int ncomp = kp__.singular_components().num_wf().get();
252 RTE_OUT(ctx.out(3)) <<
"number of singular components: " << ncomp << std::endl;
254 auto& itso = ctx.cfg().iterative_solver();
257 std::ostream* out = (kp__.comm().rank() == 0) ? &std::cout : &s;
259 auto result = davidson<double, std::complex<double>, davidson_evp_t::overlap>(Hk__, kp__, wf::num_bands(ncomp), wf::num_mag_dims(0),
260 kp__.singular_components(),
261 [&](
int i,
int ispn){
return itsol_tol__; }, itso.residual_tolerance(), itso.num_steps(), itso.locking(),
262 itso.subspace_size(), itso.converge_by_energy(), itso.extra_ortho(), *out, ctx.verbosity() - 2);
264 RTE_OUT(kp__.out(2)) <<
"smallest eigen-value of the singular components: " << result.eval[0] << std::endl;
265 for (
int i = 0; i < ncomp; i++) {
266 RTE_OUT(kp__.out(3)) <<
"singular component eigen-value[" << i <<
"]=" << result.eval[i] << std::endl;
271diagonalize_fp_fv_davidson(Hamiltonian_k<float>
const&, K_point<float>&,
double)
273 RTE_THROW(
"not implemented");
277diagonalize_fp_fv_davidson(Hamiltonian_k<double>
const& Hk__, K_point<double>& kp__,
double itsol_tol__)
279 PROFILE(
"sirius::diagonalize_fp_fv_davidson");
281 auto& ctx = Hk__.H0().ctx();
283 auto& itso = ctx.cfg().iterative_solver();
286 int ncomp = kp__.singular_components().num_wf().get();
290 get_singular_components(Hk__, kp__, itsol_tol__);
294 int nlo = ctx.unit_cell().mt_lo_basis_size();
296 auto phi_extra_new = wave_function_factory(ctx, kp__, wf::num_bands(nlo + ncomp), wf::num_mag_dims(0),
true);
297 phi_extra_new->zero(sddk::memory_t::host, wf::spin_index(0), wf::band_range(0, nlo + ncomp));
301 wf::copy(sddk::memory_t::host, kp__.singular_components(), wf::spin_index(0), wf::band_range(0, ncomp),
302 *phi_extra_new, wf::spin_index(0), wf::band_range(0, ncomp));
305 std::vector<int> offset_lo(ctx.unit_cell().num_atoms());
306 std::generate(offset_lo.begin(), offset_lo.end(),
307 [n = 0, ia = 0, &ctx] ()
mutable
310 n += ctx.unit_cell().atom(ia++).mt_lo_basis_size();
316 for (
auto it : phi_extra_new->spl_num_atoms()) {
317 for (
int xi = 0; xi < ctx.unit_cell().atom(it.i).mt_lo_basis_size(); xi++) {
318 phi_extra_new->mt_coeffs(xi, it.li, wf::spin_index(0),
319 wf::band_index(offset_lo[it.i] + xi + ncomp)) = 1.0;
323 if (env::print_checksum()) {
324 auto cs = phi_extra_new->checksum(sddk::memory_t::host, wf::band_range(0, nlo + ncomp));
325 if (kp__.comm().rank() == 0) {
326 print_checksum(
"phi_extra", cs, RTE_OUT(ctx.out()));
330 auto tolerance = [&](
int j__,
int ispn__) ->
double {
335 std::ostream* out = (kp__.comm().rank() == 0) ? &std::cout : &s;
337 auto result = davidson<double, std::complex<double>, davidson_evp_t::hamiltonian>(Hk__, kp__,
338 wf::num_bands(ctx.num_fv_states()), wf::num_mag_dims(0), kp__.fv_eigen_vectors_slab(), tolerance,
339 itso.residual_tolerance(), itso.num_steps(), itso.locking(), itso.subspace_size(),
340 itso.converge_by_energy(), itso.extra_ortho(), *out, ctx.verbosity() - 2,
341 phi_extra_new.get());
343 kp__.set_fv_eigen_values(&result.eval[0]);
347diagonalize_fp_sv(Hamiltonian_k<float>
const&, K_point<float>&)
349 RTE_THROW(
"not implemented");
356 PROFILE(
"sirius::diagonalize_fp_sv");
359 auto& ctx = Hk__.H0().ctx();
361 if (!ctx.need_sv()) {
366 auto pcs = env::print_checksum();
368 int nfv = ctx.num_fv_states();
369 int bs = ctx.cyclic_block_size();
373 std::vector<int> num_mt_coeffs(ctx.unit_cell().num_atoms());
374 for (
int ia = 0; ia < ctx.unit_cell().num_atoms(); ia++) {
375 num_mt_coeffs[ia] = ctx.unit_cell().atom(ia).mt_basis_size();
379 std::vector<wf::Wave_functions<double>> hpsi;
380 for (
int i = 0; i < ctx.num_mag_comp(); i++) {
388 if (kp.comm().rank() == 0) {
389 print_checksum(
"psi_pw", cs1, RTE_OUT(ctx.out()));
390 print_checksum(
"psi_mt", cs2, RTE_OUT(ctx.out()));
395 if (ctx.num_spins() == 2) {
396 Hk__.
apply_b(kp.fv_states(), hpsi);
401 print_memory_usage(ctx.out(), FILE_LINE);
414 if (ctx.so_correction()) {
418 std::vector<wf::device_memory_guard> mg;
419 mg.emplace_back(kp.fv_states().memory_guard(ctx.processing_unit_memory_t(), wf::copy_to::device));
420 for (
int i = 0; i < ctx.num_mag_comp(); i++) {
421 mg.emplace_back(hpsi[i].memory_guard(ctx.processing_unit_memory_t(), wf::copy_to::device));
424 print_memory_usage(ctx.out(), FILE_LINE);
426 auto& std_solver = ctx.std_evp_solver();
431 auto mem = ctx.processing_unit_memory_t();
433 if (ctx.num_mag_dims() != 3) {
435 if (ctx.blacs_grid().comm().size() == 1 && ctx.processing_unit() == sddk::device_t::GPU) {
436 h.
allocate(get_memory_pool(sddk::memory_t::device));
439 for (
int ispn = 0; ispn < ctx.num_spins(); ispn++) {
443 if (kp.comm().rank() == 0) {
444 std::stringstream s1;
445 s1 <<
"hpsi_pw_" << ispn;
446 print_checksum(s1.str(), cs1, RTE_OUT(ctx.out()));
447 std::stringstream s2;
448 s2 <<
"hpsi_mt_" << ispn;
449 print_checksum(s2.str(), cs2, RTE_OUT(ctx.out()));
453 wf::inner(ctx.spla_context(), mem, sr, kp.fv_states(), br, hpsi[ispn], br, h, 0, 0);
455 for (
int i = 0; i < nfv; i++) {
456 h.add(i, i, kp.fv_eigen_value(i));
458 PROFILE(
"sirius::diagonalize_fp_sv|stdevp");
459 std_solver.solve(nfv, nfv, h, &band_energies(0, ispn), kp.sv_eigen_vectors(ispn));
462 int nb = ctx.num_bands();
464 if (ctx.blacs_grid().comm().size() == 1 && ctx.processing_unit() == sddk::device_t::GPU) {
465 h.
allocate(get_memory_pool(sddk::memory_t::device));
468 wf::inner(ctx.spla_context(), mem, sr, kp.fv_states(), br, hpsi[0], br, h, 0, 0);
470 wf::inner(ctx.spla_context(), mem, sr, kp.fv_states(), br, hpsi[1], br, h, nfv, nfv);
472 wf::inner(ctx.spla_context(), mem, sr, kp.fv_states(), br, hpsi[2], br, h, 0, nfv);
474 if (kp.comm().size() == 1) {
475 for (
int i = 0; i < nfv; i++) {
476 for (
int j = 0; j < nfv; j++) {
477 h(nfv + j, i) =
std::conj(h(i, nfv + j));
484 for (
int i = 0; i < nfv; i++) {
485 h.add(i, i, kp.fv_eigen_value(i));
486 h.add(i + nfv, i + nfv, kp.fv_eigen_value(i));
488 PROFILE(
"sirius::diagonalize_fp_sv|stdevp");
489 std_solver.solve(nb, nb, h, &band_energies(0, 0), kp.sv_eigen_vectors(0));
492 for (
int ispn = 0; ispn < ctx.num_spinors(); ispn++) {
493 for (
int j = 0; j < ctx.num_bands(); j++) {
504 auto& ctx = Hk__.H0().ctx();
505 print_memory_usage(ctx.out(), FILE_LINE);
506 if (ctx.cfg().control().use_second_variation()) {
508 auto& itso = ctx.cfg().iterative_solver();
509 if (itso.type() ==
"exact") {
510 diagonalize_fp_fv_exact(Hk__, kp__);
511 }
else if (itso.type() ==
"davidson") {
512 diagonalize_fp_fv_davidson(Hk__, kp__, itsol_tol__);
517 diagonalize_fp_sv(Hk__, kp__);
521 RTE_THROW(
"not implemented");
524 print_memory_usage(ctx.out(), FILE_LINE);
void apply_so_correction(wf::Wave_functions< T > &psi__, std::vector< wf::Wave_functions< T > > &hpsi__) const
Apply SO correction to the first-variational LAPW wave-functions.
void apply_b(wf::Wave_functions< T > &psi__, std::vector< wf::Wave_functions< T > > &bpsi__) const
Apply magnetic field to first-variational LAPW wave-functions.
auto gkvec_sptr() const
Return shared pointer to gkvec object.
void generate_spinor_wave_functions()
Generate two-component spinor wave functions.
double band_energy(int j__, int ispn__) const
Get band energy.
void generate_fv_states()
Generate first-variational states from eigen-vectors.
void tranc(ftn_int m, ftn_int n, dmatrix< T > &A, ftn_int ia, ftn_int ja, dmatrix< T > &C, ftn_int ic, ftn_int jc) const
Conjugate transpose matrix.
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Describe a range of bands.
Describe a range of spins.
Davidson iterative solver implementation.
Contains definition of sirius::K_point class.
void zero(T *ptr__, size_t n__)
Zero the device memory.
@ scalapack
CPU ScaLAPACK.
@ cusolver
CUDA eigen-solver.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > transform(::spla::Context &spla_ctx__, sddk::memory_t mem__, la::dmatrix< F > const &M__, int irow0__, int jcol0__, real_type< F > alpha__, Wave_functions< T > const &wf_in__, spin_index s_in__, band_range br_in__, real_type< F > beta__, Wave_functions< T > &wf_out__, spin_index s_out__, band_range br_out__)
Apply linear transformation to the wave-functions.
void copy(sddk::memory_t mem__, Wave_functions< T > const &in__, wf::spin_index s_in__, wf::band_range br_in__, Wave_functions< F > &out__, wf::spin_index s_out__, wf::band_range br_out__)
Copy wave-functions.
Namespace of the SIRIUS library.
void diagonalize_fp(Hamiltonian_k< T > const &Hk__, K_point< T > &kp__, double itsol_tol__)
Diagonalize a full-potential LAPW Hamiltonian.
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.