24#ifndef __DIAGONALIZE_PP_HPP__
25#define __DIAGONALIZE_PP_HPP__
33template <
typename T,
typename F>
34inline std::enable_if_t<!std::is_same<T, real_type<F>>::value,
void>
35diagonalize_pp_exact(
int ispn__, Hamiltonian_k<T>
const& Hk__, K_point<T>& kp)
37 RTE_THROW(
"not implemented");
40template <
typename T,
typename F>
41inline std::enable_if_t<std::is_same<T, real_type<F>>::value,
void>
42diagonalize_pp_exact(
int ispn__, Hamiltonian_k<T>
const& Hk__, K_point<T>& kp__)
44 PROFILE(
"sirius::diagonalize_pp_exact");
46 auto& ctx = Hk__.H0().ctx();
48 if (ctx.gamma_point()) {
49 RTE_THROW(
"exact diagonalization for Gamma-point case is not implemented");
52 const int bs = ctx.cyclic_block_size();
53 la::dmatrix<F> hmlt(kp__.num_gkvec(), kp__.num_gkvec(), ctx.blacs_grid(), bs, bs);
54 la::dmatrix<F> ovlp(kp__.num_gkvec(), kp__.num_gkvec(), ctx.blacs_grid(), bs, bs);
55 la::dmatrix<F> evec(kp__.num_gkvec(), kp__.num_gkvec(), ctx.blacs_grid(), bs, bs);
56 std::vector<real_type<F>> eval(kp__.num_gkvec());
61 auto& gen_solver = ctx.gen_evp_solver();
63 for (
int ig = 0; ig < kp__.num_gkvec(); ig++) {
65 0.5 * std::pow(kp__.gkvec().template gkvec_cart<index_domain_t::global>(ig).length(), 2));
69 auto veff = Hk__.H0().potential().effective_potential().rg().gather_f_pw();
70 std::vector<std::complex<double>> beff;
71 if (ctx.num_mag_dims() == 1) {
72 beff = Hk__.H0().potential().effective_magnetic_field(0).rg().gather_f_pw();
73 for (
int ig = 0; ig < ctx.gvec().num_gvec(); ig++) {
81 #pragma omp parallel for schedule(static)
82 for (
int igk_col = 0; igk_col < kp__.num_gkvec_col(); igk_col++) {
83 auto gvec_col = kp__.gkvec_col().template gvec<index_domain_t::local>(igk_col);
84 for (
int igk_row = 0; igk_row < kp__.num_gkvec_row(); igk_row++) {
85 auto gvec_row = kp__.gkvec_row().template gvec<index_domain_t::local>(igk_row);
86 auto ig12 = ctx.gvec().index_g12_safe(gvec_row, gvec_col);
90 hmlt(igk_row, igk_col) +=
std::conj(veff[ig12.first]);
92 hmlt(igk_row, igk_col) += veff[ig12.first];
96 hmlt(igk_row, igk_col) +=
std::conj(beff[ig12.first]);
98 hmlt(igk_row, igk_col) += beff[ig12.first];
104 auto& Dop = Hk__.H0().D();
105 auto& Qop = Hk__.H0().Q();
107 sddk::mdarray<F, 2> dop(ctx.unit_cell().max_mt_basis_size(), ctx.unit_cell().max_mt_basis_size());
108 sddk::mdarray<F, 2> qop(ctx.unit_cell().max_mt_basis_size(), ctx.unit_cell().max_mt_basis_size());
110 sddk::mdarray<F, 2> btmp(kp__.num_gkvec_row(), ctx.unit_cell().max_mt_basis_size());
112 auto bp_gen_row = kp__.beta_projectors_row().make_generator();
113 auto bp_coeffs_row = bp_gen_row.prepare();
115 auto bp_gen_col = kp__.beta_projectors_col().make_generator();
116 auto bp_coeffs_col = bp_gen_col.prepare();
118 for (
int ichunk = 0; ichunk < kp__.beta_projectors_row().num_chunks(); ichunk++) {
121 bp_gen_row.generate(bp_coeffs_row, ichunk);
122 bp_gen_col.generate(bp_coeffs_col, ichunk);
124 auto& beta_row = bp_coeffs_row.pw_coeffs_a_;
125 auto& beta_col = bp_coeffs_col.pw_coeffs_a_;
127 for (
int i = 0; i < bp_coeffs_row.beta_chunk_.num_atoms_; i++) {
133 for (
int xi1 = 0; xi1 < nbf; xi1++) {
134 for (
int xi2 = 0; xi2 < nbf; xi2++) {
135 dop(xi1, xi2) = Dop.template value<F>(xi1, xi2, ispn__, ia);
136 qop(xi1, xi2) = Qop.template value<F>(xi1, xi2, ispn__, ia);
141 .gemm(
'N',
'N', kp__.num_gkvec_row(), nbf, nbf, &la::constant<F>::one(), &beta_row(0, offs),
142 beta_row.ld(), &dop(0, 0), dop.ld(), &la::constant<F>::zero(), &btmp(0, 0), btmp.ld());
145 .gemm(
'N',
'C', kp__.num_gkvec_row(), kp__.num_gkvec_col(), nbf, &la::constant<F>::one(), &btmp(0, 0),
146 btmp.ld(), &beta_col(0, offs), beta_col.ld(), &la::constant<F>::one(), &hmlt(0, 0), hmlt.ld());
148 if (ctx.unit_cell().atom(ia).type().augment()) {
150 .gemm(
'N',
'N', kp__.num_gkvec_row(), nbf, nbf, &la::constant<F>::one(), &beta_row(0, offs),
151 beta_row.ld(), &qop(0, 0), qop.ld(), &la::constant<F>::zero(), &btmp(0, 0), btmp.ld());
153 .gemm(
'N',
'C', kp__.num_gkvec_row(), kp__.num_gkvec_col(), nbf, &la::constant<F>::one(),
154 &btmp(0, 0), btmp.ld(), &beta_col(0, offs), beta_col.ld(), &la::constant<F>::one(),
155 &ovlp(0, 0), ovlp.ld());
162 if (ctx.cfg().control().verification() >= 1) {
163 double max_diff = check_hermitian(ovlp, kp__.num_gkvec());
164 if (max_diff > 1e-12) {
166 s <<
"overlap matrix is not hermitian, max_err = " << max_diff;
169 max_diff = check_hermitian(hmlt, kp__.num_gkvec());
170 if (max_diff > 1e-12) {
172 s <<
"Hamiltonian matrix is not hermitian, max_err = " << max_diff;
176 if (ctx.cfg().control().verification() >= 2) {
177 RTE_OUT(ctx.out()) <<
"checking eigen-values of S-matrix\n";
179 la::dmatrix<F> ovlp1(kp__.num_gkvec(), kp__.num_gkvec(), ctx.blacs_grid(), bs, bs);
180 la::dmatrix<F> evec(kp__.num_gkvec(), kp__.num_gkvec(), ctx.blacs_grid(), bs, bs);
184 std::vector<real_type<F>> eo(kp__.num_gkvec());
186 auto solver = la::Eigensolver_factory(
"scalapack");
187 solver->solve(kp__.num_gkvec(), ovlp1, eo.data(), evec);
189 for (
int i = 0; i < kp__.num_gkvec(); i++) {
191 RTE_OUT(ctx.out()) <<
"small eigen-value: " << eo[i] << std::endl;
196 if (gen_solver.solve(kp__.num_gkvec(), ctx.num_bands(), hmlt, ovlp, eval.data(), evec)) {
198 s <<
"error in full diagonalization";
202 for (
int j = 0; j < ctx.num_bands(); j++) {
203 kp__.band_energy(j, ispn__, eval[j]);
206 auto layout_in = evec.grid_layout(0, 0, kp__.num_gkvec(), ctx.num_bands());
208 kp__.spinor_wave_functions().grid_layout_pw(wf::spin_index(ispn__), wf::band_range(0, ctx.num_bands()));
210 costa::transform(layout_in, layout_out,
'N', la::constant<std::complex<T>>::one(),
211 la::constant<std::complex<T>>
::zero(), kp__.gkvec().comm().native());
216template <
typename T,
typename F>
217inline sddk::mdarray<real_type<F>, 1>
220 PROFILE(
"sirius::diag_S_davidson");
222 RTE_THROW(
"implement this");
224 auto& ctx = Hk__.H0().ctx();
226 auto& itso = ctx.cfg().iterative_solver();
242 for (
int i = 0; i < nevec; i++) {
244 for (
int ispn = 0; ispn < num_sc; ispn++) {
246 for (
int igk_loc = 0; igk_loc < kp__.
num_gkvec_loc(); igk_loc++) {
248 int igk = kp__.gkvec().offset() + igk_loc;
250 psi->pw_coeffs(igk_loc, s, ib) = 1.0;
253 psi->pw_coeffs(igk_loc, s, ib) = 0.5;
256 psi->pw_coeffs(igk_loc, s, ib) = 0.25;
259 psi->pw_coeffs(igk_loc, s, ib) = 0.125;
264 std::vector<double> tmp(4096);
265 for (
int i = 0; i < 4096; i++) {
266 tmp[i] = random<double>();
268 #pragma omp parallel for schedule(static)
269 for (
int i = 0; i < nevec; i++) {
270 for (
int ispn = 0; ispn < num_sc; ispn++) {
271 for (
int igk_loc = kp__.gkvec().skip_g0(); igk_loc < kp__.
num_gkvec_loc(); igk_loc++) {
273 int igk = kp__.gkvec().offset() + igk_loc;
279 auto result = davidson<T, F, davidson_evp_t::overlap>(
281 itso.residual_tolerance(), itso.num_steps(), itso.locking(), 10, itso.converge_by_energy(), itso.extra_ortho(),
285 for (
int i = 0; i < nevec; i++) {
286 eval(i) = result.eval(i, 0);
292template <
typename T,
typename F>
294diagonalize_pp(Hamiltonian_k<T>
const& Hk__, K_point<T>& kp__,
double itsol_tol__,
double empy_tol__)
296 auto& ctx = Hk__.H0().ctx();
297 print_memory_usage(ctx.out(), FILE_LINE);
299 davidson_result_t result{0, sddk::mdarray<double, 2>(),
true, {0, 0}};
301 auto& itso = ctx.cfg().iterative_solver();
302 if (itso.type() ==
"davidson") {
303 auto tolerance = [&](
int j__,
int ispn__) ->
double {
305 double tol = itsol_tol__;
308 if (std::abs(kp__.band_occupancy(j__, ispn__)) < ctx.min_occupancy() * ctx.max_occupancy()) {
316 std::ostream* out = (kp__.comm().rank() == 0) ? &std::cout : &s;
318 result = davidson<T, F, davidson_evp_t::hamiltonian>(
319 Hk__, kp__, wf::num_bands(ctx.num_bands()), wf::num_mag_dims(ctx.num_mag_dims()),
320 kp__.spinor_wave_functions(), tolerance, itso.residual_tolerance(), itso.num_steps(), itso.locking(),
321 itso.subspace_size(), itso.converge_by_energy(), itso.extra_ortho(), *out, 0);
322 for (
int ispn = 0; ispn < ctx.num_spinors(); ispn++) {
323 for (
int j = 0; j < ctx.num_bands(); j++) {
324 kp__.band_energy(j, ispn, result.eval(j, ispn));
328 RTE_THROW(
"unknown iterative solver type");
332 if (ctx.cfg().control().verification() >= 2) {
333 if (ctx.num_mag_dims() == 3) {
334 auto eval = kp__.band_energies(0);
335 check_wave_functions<T, F>(Hk__, kp__.spinor_wave_functions(), wf::spin_range(0, 2),
336 wf::band_range(0, ctx.num_bands()), eval.data());
338 for (
int ispn = 0; ispn < ctx.num_spins(); ispn++) {
339 auto eval = kp__.band_energies(ispn);
340 check_wave_functions<T, F>(Hk__, kp__.spinor_wave_functions(), wf::spin_range(ispn),
341 wf::band_range(0, ctx.num_bands()), eval.data());
346 print_memory_usage(ctx.out(), FILE_LINE);
Check orthogonality of wave-functions and their residuals.
Representation of Kohn-Sham Hamiltonian.
K-point related variables and methods.
int num_gkvec_loc() const
Local number of G+k vectors in case of flat distribution.
Multidimensional array with the column-major (Fortran) order.
Davidson iterative solver implementation.
Contains definition of sirius::K_point class.
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
void zero(T *ptr__, size_t n__)
Zero the device memory.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > transform(::spla::Context &spla_ctx__, sddk::memory_t mem__, la::dmatrix< F > const &M__, int irow0__, int jcol0__, real_type< F > alpha__, Wave_functions< T > const &wf_in__, spin_index s_in__, band_range br_in__, real_type< F > beta__, Wave_functions< T > &wf_out__, spin_index s_out__, band_range br_out__)
Apply linear transformation to the wave-functions.
Namespace of the SIRIUS library.
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
sddk::mdarray< real_type< F >, 1 > diag_S_davidson(Hamiltonian_k< T > const &Hk__, K_point< T > &kp__)
Diagonalize S-operator of the ultrasoft or PAW methods.
static const int ia
Global index of atom.
static const int nbf
Number of beta-projector functions for this atom.
static const int offset
Offset of beta-projectors in this chunk.