SIRIUS 7.5.0
Electronic structure library and applications
diagonalize_pp.hpp
Go to the documentation of this file.
1// Copyright (c) 2013-2023 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file diagonalize_pp.hpp
21 *
22 * \brief Diagonalize pseudo-potential Hamiltonian.
23 */
24#ifndef __DIAGONALIZE_PP_HPP__
25#define __DIAGONALIZE_PP_HPP__
26
27#include "davidson.hpp"
29#include "k_point/k_point.hpp"
30
31namespace sirius {
32
33template <typename T, typename F>
34inline std::enable_if_t<!std::is_same<T, real_type<F>>::value, void>
35diagonalize_pp_exact(int ispn__, Hamiltonian_k<T> const& Hk__, K_point<T>& kp)
36{
37 RTE_THROW("not implemented");
38}
39
40template <typename T, typename F>
41inline std::enable_if_t<std::is_same<T, real_type<F>>::value, void>
42diagonalize_pp_exact(int ispn__, Hamiltonian_k<T> const& Hk__, K_point<T>& kp__)
43{
44 PROFILE("sirius::diagonalize_pp_exact");
45
46 auto& ctx = Hk__.H0().ctx();
47
48 if (ctx.gamma_point()) {
49 RTE_THROW("exact diagonalization for Gamma-point case is not implemented");
50 }
51
52 const int bs = ctx.cyclic_block_size();
53 la::dmatrix<F> hmlt(kp__.num_gkvec(), kp__.num_gkvec(), ctx.blacs_grid(), bs, bs);
54 la::dmatrix<F> ovlp(kp__.num_gkvec(), kp__.num_gkvec(), ctx.blacs_grid(), bs, bs);
55 la::dmatrix<F> evec(kp__.num_gkvec(), kp__.num_gkvec(), ctx.blacs_grid(), bs, bs);
56 std::vector<real_type<F>> eval(kp__.num_gkvec());
57
58 hmlt.zero();
59 ovlp.zero();
60
61 auto& gen_solver = ctx.gen_evp_solver();
62
63 for (int ig = 0; ig < kp__.num_gkvec(); ig++) {
64 hmlt.set(ig, ig,
65 0.5 * std::pow(kp__.gkvec().template gkvec_cart<index_domain_t::global>(ig).length(), 2));
66 ovlp.set(ig, ig, 1);
67 }
68
69 auto veff = Hk__.H0().potential().effective_potential().rg().gather_f_pw();
70 std::vector<std::complex<double>> beff;
71 if (ctx.num_mag_dims() == 1) {
72 beff = Hk__.H0().potential().effective_magnetic_field(0).rg().gather_f_pw();
73 for (int ig = 0; ig < ctx.gvec().num_gvec(); ig++) {
74 auto z1 = veff[ig];
75 auto z2 = beff[ig];
76 veff[ig] = z1 + z2;
77 beff[ig] = z1 - z2;
78 }
79 }
80
81 #pragma omp parallel for schedule(static)
82 for (int igk_col = 0; igk_col < kp__.num_gkvec_col(); igk_col++) {
83 auto gvec_col = kp__.gkvec_col().template gvec<index_domain_t::local>(igk_col);
84 for (int igk_row = 0; igk_row < kp__.num_gkvec_row(); igk_row++) {
85 auto gvec_row = kp__.gkvec_row().template gvec<index_domain_t::local>(igk_row);
86 auto ig12 = ctx.gvec().index_g12_safe(gvec_row, gvec_col);
87
88 if (ispn__ == 0) {
89 if (ig12.second) {
90 hmlt(igk_row, igk_col) += std::conj(veff[ig12.first]);
91 } else {
92 hmlt(igk_row, igk_col) += veff[ig12.first];
93 }
94 } else {
95 if (ig12.second) {
96 hmlt(igk_row, igk_col) += std::conj(beff[ig12.first]);
97 } else {
98 hmlt(igk_row, igk_col) += beff[ig12.first];
99 }
100 }
101 }
102 }
103
104 auto& Dop = Hk__.H0().D();
105 auto& Qop = Hk__.H0().Q();
106
107 sddk::mdarray<F, 2> dop(ctx.unit_cell().max_mt_basis_size(), ctx.unit_cell().max_mt_basis_size());
108 sddk::mdarray<F, 2> qop(ctx.unit_cell().max_mt_basis_size(), ctx.unit_cell().max_mt_basis_size());
109
110 sddk::mdarray<F, 2> btmp(kp__.num_gkvec_row(), ctx.unit_cell().max_mt_basis_size());
111
112 auto bp_gen_row = kp__.beta_projectors_row().make_generator();
113 auto bp_coeffs_row = bp_gen_row.prepare();
114
115 auto bp_gen_col = kp__.beta_projectors_col().make_generator();
116 auto bp_coeffs_col = bp_gen_col.prepare();
117
118 for (int ichunk = 0; ichunk < kp__.beta_projectors_row().num_chunks(); ichunk++) {
119 /* generate beta-projectors for a block of atoms */
120
121 bp_gen_row.generate(bp_coeffs_row, ichunk);
122 bp_gen_col.generate(bp_coeffs_col, ichunk);
123
124 auto& beta_row = bp_coeffs_row.pw_coeffs_a_;
125 auto& beta_col = bp_coeffs_col.pw_coeffs_a_;
126
127 for (int i = 0; i < bp_coeffs_row.beta_chunk_.num_atoms_; i++) {
128 /* number of beta functions for a given atom */
129 int nbf = bp_coeffs_row.beta_chunk_.desc_(beta_desc_idx::nbf, i);
130 int offs = bp_coeffs_row.beta_chunk_.desc_(beta_desc_idx::offset, i);
131 int ia = bp_coeffs_row.beta_chunk_.desc_(beta_desc_idx::ia, i);
132
133 for (int xi1 = 0; xi1 < nbf; xi1++) {
134 for (int xi2 = 0; xi2 < nbf; xi2++) {
135 dop(xi1, xi2) = Dop.template value<F>(xi1, xi2, ispn__, ia);
136 qop(xi1, xi2) = Qop.template value<F>(xi1, xi2, ispn__, ia);
137 }
138 }
139 /* compute <G+k|beta> D */
140 la::wrap(la::lib_t::blas)
141 .gemm('N', 'N', kp__.num_gkvec_row(), nbf, nbf, &la::constant<F>::one(), &beta_row(0, offs),
142 beta_row.ld(), &dop(0, 0), dop.ld(), &la::constant<F>::zero(), &btmp(0, 0), btmp.ld());
143 /* compute (<G+k|beta> D ) <beta|G+k> */
144 la::wrap(la::lib_t::blas)
145 .gemm('N', 'C', kp__.num_gkvec_row(), kp__.num_gkvec_col(), nbf, &la::constant<F>::one(), &btmp(0, 0),
146 btmp.ld(), &beta_col(0, offs), beta_col.ld(), &la::constant<F>::one(), &hmlt(0, 0), hmlt.ld());
147 /* update the overlap matrix */
148 if (ctx.unit_cell().atom(ia).type().augment()) {
149 la::wrap(la::lib_t::blas)
150 .gemm('N', 'N', kp__.num_gkvec_row(), nbf, nbf, &la::constant<F>::one(), &beta_row(0, offs),
151 beta_row.ld(), &qop(0, 0), qop.ld(), &la::constant<F>::zero(), &btmp(0, 0), btmp.ld());
152 la::wrap(la::lib_t::blas)
153 .gemm('N', 'C', kp__.num_gkvec_row(), kp__.num_gkvec_col(), nbf, &la::constant<F>::one(),
154 &btmp(0, 0), btmp.ld(), &beta_col(0, offs), beta_col.ld(), &la::constant<F>::one(),
155 &ovlp(0, 0), ovlp.ld());
156 }
157 } // i (atoms in chunk)
158 }
159 // kp.beta_projectors_row().dismiss();
160 // kp.beta_projectors_col().dismiss();
161
162 if (ctx.cfg().control().verification() >= 1) {
163 double max_diff = check_hermitian(ovlp, kp__.num_gkvec());
164 if (max_diff > 1e-12) {
165 std::stringstream s;
166 s << "overlap matrix is not hermitian, max_err = " << max_diff;
167 RTE_THROW(s);
168 }
169 max_diff = check_hermitian(hmlt, kp__.num_gkvec());
170 if (max_diff > 1e-12) {
171 std::stringstream s;
172 s << "Hamiltonian matrix is not hermitian, max_err = " << max_diff;
173 RTE_THROW(s);
174 }
175 }
176 if (ctx.cfg().control().verification() >= 2) {
177 RTE_OUT(ctx.out()) << "checking eigen-values of S-matrix\n";
178
179 la::dmatrix<F> ovlp1(kp__.num_gkvec(), kp__.num_gkvec(), ctx.blacs_grid(), bs, bs);
180 la::dmatrix<F> evec(kp__.num_gkvec(), kp__.num_gkvec(), ctx.blacs_grid(), bs, bs);
181
182 sddk::copy(ovlp, ovlp1);
183
184 std::vector<real_type<F>> eo(kp__.num_gkvec());
185
186 auto solver = la::Eigensolver_factory("scalapack");
187 solver->solve(kp__.num_gkvec(), ovlp1, eo.data(), evec);
188
189 for (int i = 0; i < kp__.num_gkvec(); i++) {
190 if (eo[i] < 1e-6) {
191 RTE_OUT(ctx.out()) << "small eigen-value: " << eo[i] << std::endl;
192 }
193 }
194 }
195
196 if (gen_solver.solve(kp__.num_gkvec(), ctx.num_bands(), hmlt, ovlp, eval.data(), evec)) {
197 std::stringstream s;
198 s << "error in full diagonalization";
199 RTE_THROW(s);
200 }
201
202 for (int j = 0; j < ctx.num_bands(); j++) {
203 kp__.band_energy(j, ispn__, eval[j]);
204 }
205
206 auto layout_in = evec.grid_layout(0, 0, kp__.num_gkvec(), ctx.num_bands());
207 auto layout_out =
208 kp__.spinor_wave_functions().grid_layout_pw(wf::spin_index(ispn__), wf::band_range(0, ctx.num_bands()));
209
210 costa::transform(layout_in, layout_out, 'N', la::constant<std::complex<T>>::one(),
211 la::constant<std::complex<T>>::zero(), kp__.gkvec().comm().native());
212}
213
214/// Diagonalize S-operator of the ultrasoft or PAW methods.
215/** Sometimes this is needed to check the quality of pseudopotential. */
216template <typename T, typename F>
217inline sddk::mdarray<real_type<F>, 1>
219{
220 PROFILE("sirius::diag_S_davidson");
221
222 RTE_THROW("implement this");
223
224 auto& ctx = Hk__.H0().ctx();
225
226 auto& itso = ctx.cfg().iterative_solver();
227
228 /* for overlap matrix we do non-magnetic or non-collinear diagonalization */
229 auto num_mag_dims = wf::num_mag_dims((ctx.num_mag_dims() == 3) ? 3 : 0);
230
231 /* number of spin components, treated simultaneously
232 * 1 - in case of non-magnetic
233 * 2 - in case of non-collinear calculation
234 */
235 const int num_sc = (num_mag_dims == 3) ? 2 : 1;
236
237 /* number of eigen-vectors to find */
238 const int nevec{1};
239
240 /* eigen-vectors */
241 auto psi = wave_function_factory(ctx, kp__, wf::num_bands(nevec), num_mag_dims, false);
242 for (int i = 0; i < nevec; i++) {
243 auto ib = wf::band_index(i);
244 for (int ispn = 0; ispn < num_sc; ispn++) {
245 auto s = wf::spin_index(ispn);
246 for (int igk_loc = 0; igk_loc < kp__.num_gkvec_loc(); igk_loc++) {
247 /* global index of G+k vector */
248 int igk = kp__.gkvec().offset() + igk_loc;
249 if (igk == i + 1) {
250 psi->pw_coeffs(igk_loc, s, ib) = 1.0;
251 }
252 if (igk == i + 2) {
253 psi->pw_coeffs(igk_loc, s, ib) = 0.5;
254 }
255 if (igk == i + 3) {
256 psi->pw_coeffs(igk_loc, s, ib) = 0.25;
257 }
258 if (igk == i + 4) {
259 psi->pw_coeffs(igk_loc, s, ib) = 0.125;
260 }
261 }
262 }
263 }
264 std::vector<double> tmp(4096);
265 for (int i = 0; i < 4096; i++) {
266 tmp[i] = random<double>();
267 }
268 #pragma omp parallel for schedule(static)
269 for (int i = 0; i < nevec; i++) {
270 for (int ispn = 0; ispn < num_sc; ispn++) {
271 for (int igk_loc = kp__.gkvec().skip_g0(); igk_loc < kp__.num_gkvec_loc(); igk_loc++) {
272 /* global index of G+k vector */
273 int igk = kp__.gkvec().offset() + igk_loc;
274 psi->pw_coeffs(igk_loc, wf::spin_index(ispn), wf::band_index(i)) += tmp[igk & 0xFFF] * 1e-5;
275 }
276 }
277 }
278
279 auto result = davidson<T, F, davidson_evp_t::overlap>(
280 Hk__, kp__, wf::num_bands(nevec), num_mag_dims, *psi, [](int i, int ispn) { return 1e-10; },
281 itso.residual_tolerance(), itso.num_steps(), itso.locking(), 10, itso.converge_by_energy(), itso.extra_ortho(),
282 std::cout, 0);
283
284 sddk::mdarray<real_type<F>, 1> eval(nevec);
285 for (int i = 0; i < nevec; i++) {
286 eval(i) = result.eval(i, 0);
287 }
288
289 return eval;
290}
291
292template <typename T, typename F>
293inline auto
294diagonalize_pp(Hamiltonian_k<T> const& Hk__, K_point<T>& kp__, double itsol_tol__, double empy_tol__)
295{
296 auto& ctx = Hk__.H0().ctx();
297 print_memory_usage(ctx.out(), FILE_LINE);
298
299 davidson_result_t result{0, sddk::mdarray<double, 2>(), true, {0, 0}};
300
301 auto& itso = ctx.cfg().iterative_solver();
302 if (itso.type() == "davidson") {
303 auto tolerance = [&](int j__, int ispn__) -> double {
304 /* tolerance for occupied states */
305 double tol = itsol_tol__;
306 /* if band is empty, make tolerance larger (in most cases we don't need high precision on
307 * unoccupied states) */
308 if (std::abs(kp__.band_occupancy(j__, ispn__)) < ctx.min_occupancy() * ctx.max_occupancy()) {
309 tol += empy_tol__;
310 }
311
312 return tol;
313 };
314
315 std::stringstream s;
316 std::ostream* out = (kp__.comm().rank() == 0) ? &std::cout : &s;
317
318 result = davidson<T, F, davidson_evp_t::hamiltonian>(
319 Hk__, kp__, wf::num_bands(ctx.num_bands()), wf::num_mag_dims(ctx.num_mag_dims()),
320 kp__.spinor_wave_functions(), tolerance, itso.residual_tolerance(), itso.num_steps(), itso.locking(),
321 itso.subspace_size(), itso.converge_by_energy(), itso.extra_ortho(), *out, 0);
322 for (int ispn = 0; ispn < ctx.num_spinors(); ispn++) {
323 for (int j = 0; j < ctx.num_bands(); j++) {
324 kp__.band_energy(j, ispn, result.eval(j, ispn));
325 }
326 }
327 } else {
328 RTE_THROW("unknown iterative solver type");
329 }
330
331 /* check wave-functions */
332 if (ctx.cfg().control().verification() >= 2) {
333 if (ctx.num_mag_dims() == 3) {
334 auto eval = kp__.band_energies(0);
335 check_wave_functions<T, F>(Hk__, kp__.spinor_wave_functions(), wf::spin_range(0, 2),
336 wf::band_range(0, ctx.num_bands()), eval.data());
337 } else {
338 for (int ispn = 0; ispn < ctx.num_spins(); ispn++) {
339 auto eval = kp__.band_energies(ispn);
340 check_wave_functions<T, F>(Hk__, kp__.spinor_wave_functions(), wf::spin_range(ispn),
341 wf::band_range(0, ctx.num_bands()), eval.data());
342 }
343 }
344 }
345
346 print_memory_usage(ctx.out(), FILE_LINE);
347
348 return result;
349}
350
351} // namespace sirius
352
353#endif
Check orthogonality of wave-functions and their residuals.
Representation of Kohn-Sham Hamiltonian.
K-point related variables and methods.
Definition: k_point.hpp:44
int num_gkvec_loc() const
Local number of G+k vectors in case of flat distribution.
Definition: k_point.hpp:393
Multidimensional array with the column-major (Fortran) order.
Definition: memory.hpp:660
Davidson iterative solver implementation.
Contains definition of sirius::K_point class.
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
Definition: acc.hpp:320
void zero(T *ptr__, size_t n__)
Zero the device memory.
Definition: acc.hpp:397
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > transform(::spla::Context &spla_ctx__, sddk::memory_t mem__, la::dmatrix< F > const &M__, int irow0__, int jcol0__, real_type< F > alpha__, Wave_functions< T > const &wf_in__, spin_index s_in__, band_range br_in__, real_type< F > beta__, Wave_functions< T > &wf_out__, spin_index s_out__, band_range br_out__)
Apply linear transformation to the wave-functions.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165
sddk::mdarray< real_type< F >, 1 > diag_S_davidson(Hamiltonian_k< T > const &Hk__, K_point< T > &kp__)
Diagonalize S-operator of the ultrasoft or PAW methods.
static const int ia
Global index of atom.
static const int nbf
Number of beta-projector functions for this atom.
static const int offset
Offset of beta-projectors in this chunk.