SIRIUS 7.5.0
Electronic structure library and applications
hubbard_occupancies_derivatives.cpp
1// Copyright (c) 2013-2022 Mathieu Taillefumier, Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file hubbard_occupancies_derivatives.hpp
21 *
22 * \brief Generate derivatives of occupancy matrix.
23 *
24 * Compute the forces for the simple LDA+U method not the fully rotationally invariant one.
25 * It can not be used for LDA+U+SO either.
26 *
27 * This code is based on two papers :
28 * - PRB 84, 161102(R) (2011) : https://journals.aps.org/prb/abstract/10.1103/PhysRevB.84.161102
29 * - PRB 102, 235159 (2020) : https://journals.aps.org/prb/abstract/10.1103/PhysRevB.102.235159
30 *
31 * \note This code only applies to the collinear case.
32 */
33
35#include "hubbard.hpp"
36#include "SDDK/memory.hpp"
38#include "geometry/wavefunction_strain_deriv.hpp"
39
40namespace sirius {
41
42static void
43update_density_matrix_deriv(la::lib_t la__, sddk::memory_t mt__, int nwfh__, int nbnd__, std::complex<double>* alpha__,
44 la::dmatrix<std::complex<double>> const& phi_hub_s_psi_deriv__, la::dmatrix<std::complex<double>> const& psi_s_phi_hub__,
45 std::complex<double>* dn__, int ld__)
46{
47 la::wrap(la__).gemm('N', 'N', nwfh__, nwfh__, nbnd__, alpha__,
48 phi_hub_s_psi_deriv__.at(mt__, 0, 0), phi_hub_s_psi_deriv__.ld(),
49 psi_s_phi_hub__.at(mt__, 0, 0), psi_s_phi_hub__.ld(),
50 &la::constant<std::complex<double>>::one(), dn__, ld__);
51
52 la::wrap(la__).gemm('C', 'C', nwfh__, nwfh__, nbnd__, alpha__,
53 psi_s_phi_hub__.at(mt__, 0, 0), psi_s_phi_hub__.ld(),
54 phi_hub_s_psi_deriv__.at(mt__, 0, 0), phi_hub_s_psi_deriv__.ld(),
55 &la::constant<std::complex<double>>::one(), dn__, ld__);
56}
57
58static void
59build_phi_hub_s_psi_deriv(Simulation_context const& ctx__, int nbnd__, int nawf__,
60 la::dmatrix<std::complex<double>> const& ovlp__, la::dmatrix<std::complex<double>> const& inv_sqrt_O__,
61 la::dmatrix<std::complex<double>> const& phi_atomic_s_psi__,
62 la::dmatrix<std::complex<double>> const& phi_atomic_ds_psi__,
63 std::vector<int> const& atomic_wf_offset__, std::vector<int> const& hubbard_wf_offset__,
64 la::dmatrix<std::complex<double>>& phi_hub_s_psi_deriv__)
65{
66 phi_hub_s_psi_deriv__.zero();
67
68 for (int ia = 0; ia < ctx__.unit_cell().num_atoms(); ia++) {
69 auto& type = ctx__.unit_cell().atom(ia).type();
70
71 if (type.hubbard_correction()) {
72 /* loop over Hubbard orbitals of the atom */
73 for (auto e : type.indexr_hub()) {
74 auto& hd = type.lo_descriptor_hub(e.idxrf);
75 int l = e.am.l();
76 int mmax = 2 * l + 1;
77
78 int idxr_wf = hd.idx_wf();
79 int offset_in_wf = atomic_wf_offset__[ia] + type.indexb_wfs().index_of(rf_index(idxr_wf));
80 int offset_in_hwf = hubbard_wf_offset__[ia] + type.indexb_hub().index_of(e.idxrf);
81
82 if (ctx__.cfg().hubbard().full_orthogonalization()) {
83 /* compute \sum_{m} d/d r_{alpha} O^{-1/2}_{m,i} <phi_atomic_{m} | S | psi_{jk} > */
84 la::wrap(la::lib_t::blas).gemm('C', 'N', mmax, nbnd__, nawf__,
85 &la::constant<std::complex<double>>::one(),
86 ovlp__.at(sddk::memory_t::host, 0, offset_in_wf), ovlp__.ld(),
87 phi_atomic_s_psi__.at(sddk::memory_t::host), phi_atomic_s_psi__.ld(),
88 &la::constant<std::complex<double>>::one(),
89 phi_hub_s_psi_deriv__.at(sddk::memory_t::host, offset_in_hwf, 0), phi_hub_s_psi_deriv__.ld());
90
91 la::wrap(la::lib_t::blas).gemm('C', 'N', mmax, nbnd__, nawf__,
92 &la::constant<std::complex<double>>::one(),
93 inv_sqrt_O__.at(sddk::memory_t::host, 0, offset_in_wf), inv_sqrt_O__.ld(),
94 phi_atomic_ds_psi__.at(sddk::memory_t::host), phi_atomic_ds_psi__.ld(),
95 &la::constant<std::complex<double>>::one(),
96 phi_hub_s_psi_deriv__.at(sddk::memory_t::host, offset_in_hwf, 0), phi_hub_s_psi_deriv__.ld());
97 } else {
98 /* just copy part of the matrix elements in the order in which
99 * Hubbard wfs are defined */
100 for (int ibnd = 0; ibnd < nbnd__; ibnd++) {
101 for (int m = 0; m < mmax; m++) {
102 phi_hub_s_psi_deriv__(offset_in_hwf + m, ibnd) = phi_atomic_ds_psi__(offset_in_wf + m, ibnd);
103 }
104 }
105 }
106 } // idxrf
107 }
108 } // ia
109}
110
111static void
112compute_inv_sqrt_O_deriv(la::dmatrix<std::complex<double>>& O_deriv__, la::dmatrix<std::complex<double>>& evec_O__,
113 std::vector<double>& eval_O__, int nawf__)
114{
115 /* compute \tilde O' = U^{H}O'U */
116 unitary_similarity_transform(1, O_deriv__, evec_O__, nawf__);
117
118 for (int i = 0; i < nawf__; i++) {
119 for (int j = 0; j < nawf__; j++) {
120 O_deriv__(j, i) /= -(eval_O__[i] * std::sqrt(eval_O__[j]) + eval_O__[j] * std::sqrt(eval_O__[i]));
121 }
122 }
123 /* compute d/dr O^{-1/2} */
124 unitary_similarity_transform(0, O_deriv__, evec_O__, nawf__);
125}
126
127void
129 sddk::mdarray<std::complex<double>, 5>& dn__)
130{
131 PROFILE("sirius::Hubbard::compute_occupancies_derivatives");
132
133 auto la = la::lib_t::none;
134 auto mt = sddk::memory_t::none;
135 switch (ctx_.processing_unit()) {
136 case sddk::device_t::CPU: {
137 la = la::lib_t::blas;
138 mt = sddk::memory_t::host;
139 break;
140 }
141 case sddk::device_t::GPU: {
143 mt = sddk::memory_t::device;
144 break;
145 }
146 }
147 auto alpha = std::complex<double>(kp__.weight(), 0.0);
148
149 // TODO: check if we have a norm conserving pseudo potential;
150 // TODO: distribute (MPI) all matrices in the basis of atomic orbitals
151 // only derivatives of the atomic wave functions are needed.
152 auto& phi_atomic = kp__.atomic_wave_functions();
153 auto& phi_atomic_S = kp__.atomic_wave_functions_S();
154
155 auto num_ps_atomic_wf = ctx_.unit_cell().num_ps_atomic_wf();
156 auto num_hubbard_wf = ctx_.unit_cell().num_hubbard_wf();
157
158 /* number of atomic wave-functions */
159 int nawf = num_ps_atomic_wf.first;
160 /* number of Hubbard wave-functions */
161 int nhwf = num_hubbard_wf.first;
162
163 RTE_ASSERT(nawf == phi_atomic.num_wf().get());
164 RTE_ASSERT(nawf == phi_atomic_S.num_wf().get());
165
166 if (ctx_.processing_unit() == sddk::device_t::GPU) {
167 dn__.allocate(sddk::memory_t::device);
168 }
169
170 /* compute overlap matrix */
172 std::unique_ptr<la::dmatrix<std::complex<double>>> inv_sqrt_O;
173 std::unique_ptr<la::dmatrix<std::complex<double>>> evec_O;
174 std::vector<double> eval_O;
175 if (ctx_.cfg().hubbard().full_orthogonalization()) {
176 ovlp = la::dmatrix<std::complex<double>>(nawf, nawf);
177 wf::inner(ctx_.spla_context(), mt, wf::spin_range(0), phi_atomic, wf::band_range(0, nawf),
178 phi_atomic_S, wf::band_range(0, nawf), ovlp, 0, 0);
179
180 /* a tuple of O^{-1/2}, U, \lambda */
181 auto result = inverse_sqrt(ovlp, nawf);
182 inv_sqrt_O = std::move(std::get<0>(result));
183 evec_O = std::move(std::get<1>(result));
184 eval_O = std::get<2>(result);
185 }
186
187 /* compute < psi_{ik} | S | phi_hub > */
188 /* this is used in the final expression for the occupation matrix derivative */
189 std::array<la::dmatrix<std::complex<double>>, 2> psi_s_phi_hub;
190 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
191 psi_s_phi_hub[ispn] = la::dmatrix<std::complex<double>>(kp__.num_occupied_bands(ispn), nhwf);
192 wf::inner(ctx_.spla_context(), mt, wf::spin_range(ispn), kp__.spinor_wave_functions(),
194 wf::band_range(0, nhwf), psi_s_phi_hub[ispn], 0, 0);
195 }
196
197 /* temporary storage */
198 auto phi_atomic_tmp = wave_function_factory(ctx_, kp__, phi_atomic.num_wf(), wf::num_mag_dims(0), false);
199 auto s_phi_atomic_tmp = wave_function_factory(ctx_, kp__, phi_atomic_S.num_wf(), wf::num_mag_dims(0), false);
200 auto mg1 = phi_atomic_tmp->memory_guard(mt);
201 auto mg2 = s_phi_atomic_tmp->memory_guard(mt);
202
203 /* compute < d phi_atomic / d r_{j} | S | psi_{ik} > and < d phi_atomic / d r_{j} | S | phi_atomic > */
204 std::array<std::array<la::dmatrix<std::complex<double>>, 2>, 3> grad_phi_atomic_s_psi;
205 std::array<la::dmatrix<std::complex<double>>, 3> grad_phi_atomic_s_phi_atomic;
206
207 auto& bp = kp__.beta_projectors();
208 auto bp_gen = bp.make_generator(mt);
209 auto bp_coeffs = bp_gen.prepare();
210
211 for (int x = 0; x < 3; x++) {
212 /* compute |phi_atomic_tmp> = |d phi_atomic / d r_{alpha} > for all atoms */
213 for (int i = 0; i < nawf; i++) {
214 for (int igloc = 0; igloc < kp__.num_gkvec_loc(); igloc++) {
215 /* G+k vector in Cartesian coordinates */
216 auto gk = kp__.gkvec().template gkvec_cart<index_domain_t::local>(igloc);
217 /* gradient of phi_atomic */
218 phi_atomic_tmp->pw_coeffs(igloc, wf::spin_index(0), wf::band_index(i)) = std::complex<double>(0.0, -gk[x]) *
219 phi_atomic.pw_coeffs(igloc, wf::spin_index(0), wf::band_index(i));
220 }
221 }
222 if (is_device_memory(mt)) {
223 phi_atomic_tmp->copy_to(mt);
224 }
225 /* apply S to |d phi_atomic / d r_{alpha} > */
226 apply_S_operator<double, std::complex<double>>(mt, wf::spin_range(0), wf::band_range(0, nawf), bp_gen,
227 bp_coeffs, *phi_atomic_tmp, &q_op__, *s_phi_atomic_tmp);
228
229 /* compute < d phi_atomic / d r_{alpha} | S | phi_atomic >
230 * used to compute derivative of the inverse square root of the overlap matrix */
231 if (ctx_.cfg().hubbard().full_orthogonalization()) {
232 grad_phi_atomic_s_phi_atomic[x] = la::dmatrix<std::complex<double>>(nawf, nawf);
233 wf::inner(ctx_.spla_context(), mt, wf::spin_range(0), *s_phi_atomic_tmp,
234 wf::band_range(0, nawf), phi_atomic, wf::band_range(0, nawf), grad_phi_atomic_s_phi_atomic[x], 0, 0);
235 }
236
237 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
238 /* allocate space */
239 grad_phi_atomic_s_psi[x][ispn] = la::dmatrix<std::complex<double>>(nawf, kp__.num_occupied_bands(ispn));
240 /* compute < d phi_atomic / d r_{j} | S | psi_{ik} > for all atoms */
241 wf::inner(ctx_.spla_context(), mt, wf::spin_range(ispn), *s_phi_atomic_tmp,
242 wf::band_range(0, nawf), kp__.spinor_wave_functions(),
243 wf::band_range(0, kp__.num_occupied_bands(ispn)), grad_phi_atomic_s_psi[x][ispn], 0, 0);
244 }
245 }
246
247 /* compute <phi_atomic | S | psi_{ik} > */
248 std::array<la::dmatrix<std::complex<double>>, 2> phi_atomic_s_psi;
249 if (ctx_.cfg().hubbard().full_orthogonalization()) {
250 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
251 phi_atomic_s_psi[ispn] = la::dmatrix<std::complex<double>>(nawf, kp__.num_occupied_bands(ispn));
252 /* compute < phi_atomic | S | psi_{ik} > for all atoms */
253 wf::inner(ctx_.spla_context(), mt, wf::spin_range(ispn), phi_atomic_S,
254 wf::band_range(0, nawf), kp__.spinor_wave_functions(),
255 wf::band_range(0, kp__.num_occupied_bands(ispn)), phi_atomic_s_psi[ispn], 0, 0);
256 }
257 }
258
259 Beta_projectors_gradient<double> bp_grad(ctx_, kp__.gkvec(), kp__.beta_projectors());
260 auto bp_grad_gen = bp_grad.make_generator(mt);
261 auto bp_grad_coeffs = bp_grad_gen.prepare();
262
263 dn__.zero(mt);
264
265 for (int ichunk = 0; ichunk < kp__.beta_projectors().num_chunks(); ichunk++) {
266
267 // store <beta|x> on device if `mt` is device memory.
268 bool copy_back_innerb = sddk::is_device_memory(mt);
269
270 bp_gen.generate(bp_coeffs, ichunk);
271 auto& beta_chunk = bp_coeffs.beta_chunk_;
272
273 /* <beta | phi_atomic> for this chunk */
274 // auto beta_phi_atomic = kp__.beta_projectors().inner<std::complex<double>>(mt, ichunk, phi_atomic, wf::spin_index(0),
275 // wf::band_range(0, nawf));
276 auto beta_phi_atomic =
277 inner_prod_beta<std::complex<double>>(ctx_.spla_context(), mt, ctx_.host_memory_t(), copy_back_innerb, bp_coeffs,
278 phi_atomic, wf::spin_index(0), wf::band_range(0, nawf));
279
280 for (int x = 0; x < 3; x++) {
281 bp_grad_gen.generate(bp_grad_coeffs, ichunk, x);
282
283 /* <dbeta | phi> for this chunk */
284 // auto grad_beta_phi_atomic = bp_grad.inner<std::complex<double>>(mt, ichunk, phi_atomic, wf::spin_index(0),
285 // wf::band_range(0, nawf));
286 auto grad_beta_phi_atomic = inner_prod_beta<std::complex<double>>(
287 ctx_.spla_context(), mt, ctx_.host_memory_t(), copy_back_innerb, bp_grad_coeffs, phi_atomic,
288 wf::spin_index(0), wf::band_range(0, nawf));
289
290 for (int i = 0; i < beta_chunk.num_atoms_; i++) {
291 /* this is a displacement atom */
292 int ja = beta_chunk.desc_(beta_desc_idx::ia, i);
293
294 /* build |phi_atomic_tmp> = | d S / d r_{j} | phi_atomic > */
295 /* it consists of two contributions:
296 * | beta > Q < d beta / dr | phi_atomic > and
297 * | d beta / dr > Q < beta | phi_atomic > */
298 phi_atomic_tmp->zero(mt, wf::spin_index(0), wf::band_range(0, nawf));
299 if (ctx_.unit_cell().atom(ja).type().augment()) {
300 q_op__.apply(mt, ichunk, atom_index_t::local(i), 0, *phi_atomic_tmp, wf::band_range(0, nawf),
301 bp_grad_coeffs, beta_phi_atomic);
302 q_op__.apply(mt, ichunk, atom_index_t::local(i), 0, *phi_atomic_tmp, wf::band_range(0, nawf),
303 bp_coeffs, grad_beta_phi_atomic);
304 }
305
306 /* compute O' = d O / d r_{alpha} */
307 /* from O = <phi | S | phi > we get
308 * O' = <phi' | S | phi> + <phi | S' |phi> + <phi | S | phi'> */
309
310 if (ctx_.cfg().hubbard().full_orthogonalization()) {
311 /* <phi | S' | phi> */
312 wf::inner(ctx_.spla_context(), mt, wf::spin_range(0), phi_atomic,
313 wf::band_range(0, nawf), *phi_atomic_tmp, wf::band_range(0, nawf), ovlp, 0, 0);
314 /* add <phi' | S | phi> and <phi | S | phi'> */
315 /* |phi' > = d|phi> / d r_{alpha} which is non-zero for a current displacement atom only */
316 auto& type = ctx_.unit_cell().atom(ja).type();
317 for (int xi = 0; xi < type.indexb_wfs().size(); xi++) {
318 int i = num_ps_atomic_wf.second[ja] + xi;
319 for (int j = 0; j < nawf; j++) {
320 ovlp(i, j) += grad_phi_atomic_s_phi_atomic[x](i, j);
321 ovlp(j, i) += std::conj(grad_phi_atomic_s_phi_atomic[x](i, j));
322 }
323 }
324 compute_inv_sqrt_O_deriv(ovlp, *evec_O, eval_O, nawf);
325 }
326
327 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
328 /* compute <phi_atomic | dS/dr_j | psi_{ik}> */
329 la::dmatrix<std::complex<double>> phi_atomic_ds_psi(nawf, kp__.num_occupied_bands(ispn));
330 wf::inner(ctx_.spla_context(), mt, wf::spin_range(ispn), *phi_atomic_tmp,
331 wf::band_range(0, nawf), kp__.spinor_wave_functions(),
332 wf::band_range(0, kp__.num_occupied_bands(ispn)), phi_atomic_ds_psi, 0, 0);
333
334 /* add <d phi / d r_{alpha} | S | psi_{jk}> which is diagonal (in atom index) */
335 for (int ibnd = 0; ibnd < kp__.num_occupied_bands(ispn); ibnd++) {
336 for (int xi = 0; xi < ctx_.unit_cell().atom(ja).type().indexb_wfs().size(); xi++) {
337 int i = num_ps_atomic_wf.second[ja] + xi;
338 phi_atomic_ds_psi(i, ibnd) += grad_phi_atomic_s_psi[x][ispn](i, ibnd);
339 }
340 }
341
342 /* build the full d <phi_hub | S | psi_ik> / d r_{alpha} matrix */
343 la::dmatrix<std::complex<double>> phi_hub_s_psi_deriv(num_hubbard_wf.first, kp__.num_occupied_bands(ispn));
344
345 build_phi_hub_s_psi_deriv(ctx_, kp__.num_occupied_bands(ispn), nawf, ovlp, *inv_sqrt_O,
346 phi_atomic_s_psi[ispn], phi_atomic_ds_psi, num_ps_atomic_wf.second, num_hubbard_wf.second,
347 phi_hub_s_psi_deriv);
348
349 /* multiply by eigen-energy */
350 for (int ibnd = 0; ibnd < kp__.num_occupied_bands(ispn); ibnd++) {
351 for (int j = 0; j < num_hubbard_wf.first; j++) {
352 phi_hub_s_psi_deriv(j, ibnd) *= kp__.band_occupancy(ibnd, ispn);
353 }
354 }
355
356 if (ctx_.processing_unit() == sddk::device_t::GPU) {
357 phi_hub_s_psi_deriv.allocate(get_memory_pool(sddk::memory_t::device)).copy_to(sddk::memory_t::device);
358 psi_s_phi_hub[ispn].allocate(get_memory_pool(sddk::memory_t::device)).copy_to(sddk::memory_t::device);
359 }
360
361 /* update the density matrix derivative */
362 update_density_matrix_deriv(la, mt, num_hubbard_wf.first, kp__.num_occupied_bands(ispn),
363 &alpha, phi_hub_s_psi_deriv, psi_s_phi_hub[ispn], dn__.at(mt, 0, 0, ispn, x, ja),
364 dn__.ld());
365 } // ispn
366 } // i
367 } // x
368 } // ichunk
369
370 if (ctx_.processing_unit() == sddk::device_t::GPU) {
371 dn__.copy_to(sddk::memory_t::host);
372 dn__.deallocate(sddk::memory_t::device);
373 }
374}
375
376void // TODO: rename to strain_deriv, rename previous func. to displacement_deriv
378 sddk::mdarray<std::complex<double>, 4>& dn__)
379{
380 PROFILE("sirius::Hubbard::compute_occupancies_stress_derivatives");
381
382 auto la = la::lib_t::none;
383 auto mt = sddk::memory_t::none;
384 switch (ctx_.processing_unit()) {
385 case sddk::device_t::CPU: {
386 la = la::lib_t::blas;
387 mt = sddk::memory_t::host;
388 break;
389 }
390 case sddk::device_t::GPU: {
392 mt = sddk::memory_t::device;
393 break;
394 }
395 }
396 auto alpha = std::complex<double>(kp__.weight(), 0.0);
397
398 Beta_projectors_strain_deriv<double> bp_strain_deriv(ctx_, kp__.gkvec());
399 auto bp_strain_gen = bp_strain_deriv.make_generator();
400 /* initialize the beta projectors and derivatives */
401 auto bp_strain_coeffs = bp_strain_gen.prepare();
402
403 auto bp_gen = kp__.beta_projectors().make_generator();
404 auto bp_coeffs = bp_gen.prepare();
405
406 const int lmax = ctx_.unit_cell().lmax();
407 const int lmmax = sf::lmmax(lmax);
408
411
412 /* array of real spherical harmonics and derivatives for each G-vector */
413 #pragma omp parallel for schedule(static)
414 for (int igkloc = 0; igkloc < kp__.num_gkvec_loc(); igkloc++) {
415 /* gvs = {r, theta, phi} */
416 auto gvc = kp__.gkvec().gkvec_cart<index_domain_t::local>(igkloc);
417 auto rtp = r3::spherical_coordinates(gvc);
418
419 sf::spherical_harmonics(lmax, rtp[1], rtp[2], &rlm_g(0, igkloc));
420 sddk::mdarray<double, 2> rlm_dg_tmp(&rlm_dg(0, 0, igkloc), lmmax, 3);
421 sf::dRlm_dr(lmax, gvc, rlm_dg_tmp);
422 }
423
424 /* atomic wave functions */
425 auto& phi_atomic = kp__.atomic_wave_functions();
426 auto& phi_atomic_S = kp__.atomic_wave_functions_S();
427 auto& phi_hub_S = kp__.hubbard_wave_functions_S();
428
429 auto num_ps_atomic_wf = ctx_.unit_cell().num_ps_atomic_wf();
430 auto num_hubbard_wf = ctx_.unit_cell().num_hubbard_wf();
431
432 /* number of atomic wave-functions */
433 int nawf = num_ps_atomic_wf.first;
434 /* number of Hubbard wave-functions */
435 int nhwf = num_hubbard_wf.first;
436
437 RTE_ASSERT(nawf == phi_atomic.num_wf().get());
438 RTE_ASSERT(nawf == phi_atomic_S.num_wf().get());
439 RTE_ASSERT(nhwf == phi_hub_S.num_wf().get());
440
441 auto dphi_atomic = wave_function_factory(ctx_, kp__, phi_atomic.num_wf(), wf::num_mag_dims(0), false);
442 auto s_dphi_atomic = wave_function_factory(ctx_, kp__, phi_atomic.num_wf(), wf::num_mag_dims(0), false);
443 auto ds_phi_atomic = wave_function_factory(ctx_, kp__, phi_atomic.num_wf(), wf::num_mag_dims(0), false);
444
445 /* compute < psi_{ik} | S | phi_hub > */
446 /* this is used in the final expression for the occupation matrix derivative */
447 std::array<la::dmatrix<std::complex<double>>, 2> psi_s_phi_hub;
448 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
449 psi_s_phi_hub[ispn] = la::dmatrix<std::complex<double>>(kp__.num_occupied_bands(ispn), nhwf);
450 wf::inner(ctx_.spla_context(), mt, wf::spin_range(ispn), kp__.spinor_wave_functions(),
451 wf::band_range(0, kp__.num_occupied_bands(ispn)), phi_hub_S, wf::band_range(0, nhwf),
452 psi_s_phi_hub[ispn], 0, 0);
453 }
454
455 /* compute overlap matrix */
457 std::unique_ptr<la::dmatrix<std::complex<double>>> inv_sqrt_O;
458 std::unique_ptr<la::dmatrix<std::complex<double>>> evec_O;
459 std::vector<double> eval_O;
460 if (ctx_.cfg().hubbard().full_orthogonalization()) {
461 ovlp = la::dmatrix<std::complex<double>>(nawf, nawf);
462 wf::inner(ctx_.spla_context(), mt, wf::spin_range(0), phi_atomic, wf::band_range(0, nawf),
463 phi_atomic_S, wf::band_range(0, nawf), ovlp, 0, 0);
464
465 /* a tuple of O^{-1/2}, U, \lambda */
466 auto result = inverse_sqrt(ovlp, nawf);
467 inv_sqrt_O = std::move(std::get<0>(result));
468 evec_O = std::move(std::get<1>(result));
469 eval_O = std::get<2>(result);
470 }
471
472 /* compute <phi_atomic | S | psi_{ik} > */
473 std::array<la::dmatrix<std::complex<double>>, 2> phi_atomic_s_psi;
474 if (ctx_.cfg().hubbard().full_orthogonalization()) {
475 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
476 phi_atomic_s_psi[ispn] = la::dmatrix<std::complex<double>>(nawf, kp__.num_occupied_bands(ispn));
477 /* compute < phi_atomic | S | psi_{ik} > for all atoms */
478 wf::inner(ctx_.spla_context(), mt, wf::spin_range(ispn), phi_atomic_S,
479 wf::band_range(0, nawf), kp__.spinor_wave_functions(),
480 wf::band_range(0, kp__.num_occupied_bands(ispn)), phi_atomic_s_psi[ispn], 0, 0);
481 }
482 }
483
484 auto mg1 = dphi_atomic->memory_guard(mt);
485 auto mg2 = s_dphi_atomic->memory_guard(mt);
486 auto mg3 = ds_phi_atomic->memory_guard(mt);
487 for (int nu = 0; nu < 3; nu++) {
488 for (int mu = 0; mu < 3; mu++) {
489 /* compute |d phi_atomic / d epsilon_{mu, nu} > */
490 wavefunctions_strain_deriv(ctx_, kp__, *dphi_atomic, rlm_g, rlm_dg, nu, mu);
491
492 if (is_device_memory(mt)) {
493 dphi_atomic->copy_to(mt);
494 }
495
496 /* compute S |d phi_atomic / d epsilon_{mu, nu} > */
497 sirius::apply_S_operator<double, std::complex<double>>(mt, wf::spin_range(0), wf::band_range(0, nawf),
498 bp_gen, bp_coeffs, *dphi_atomic, &q_op__,
499 *s_dphi_atomic);
500
501 ds_phi_atomic->zero(mt, wf::spin_index(0), wf::band_range(0, nawf));
502 sirius::apply_S_operator_strain_deriv(mt, 3 * nu + mu, bp_gen, bp_coeffs, bp_strain_gen, bp_strain_coeffs,
503 phi_atomic, q_op__, *ds_phi_atomic);
504
505 if (ctx_.cfg().hubbard().full_orthogonalization()) {
506 /* compute <phi_atomic | ds/d epsilon | phi_atomic> */
507 wf::inner(ctx_.spla_context(), mt, wf::spin_range(0), phi_atomic,
508 wf::band_range(0, nawf), *ds_phi_atomic, wf::band_range(0, nawf), ovlp, 0, 0);
509
510 /* compute <d phi_atomic / d epsilon | S | phi_atomic > */
511 la::dmatrix<std::complex<double>> tmp(nawf, nawf);
512 wf::inner(ctx_.spla_context(), mt, wf::spin_range(0), *s_dphi_atomic,
513 wf::band_range(0, nawf), phi_atomic, wf::band_range(0, nawf), tmp, 0, 0);
514
515 for (int i = 0; i < nawf; i++) {
516 for (int j = 0; j < nawf; j++) {
517 ovlp(i, j) += tmp(i, j) + std::conj(tmp(j, i));
518 }
519 }
520 compute_inv_sqrt_O_deriv(ovlp, *evec_O, eval_O, nawf);
521 }
522
523 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
524 la::dmatrix<std::complex<double>> dphi_atomic_s_psi(nawf, kp__.num_occupied_bands(ispn));
525 wf::inner(ctx_.spla_context(), mt, wf::spin_range(ispn), *s_dphi_atomic,
526 wf::band_range(0, nawf), kp__.spinor_wave_functions(),
527 wf::band_range(0, kp__.num_occupied_bands(ispn)), dphi_atomic_s_psi, 0, 0);
528
529 la::dmatrix<std::complex<double>> phi_atomic_ds_psi(nawf, kp__.num_occupied_bands(ispn));
530 wf::inner(ctx_.spla_context(), mt, wf::spin_range(ispn), *ds_phi_atomic,
531 wf::band_range(0, nawf), kp__.spinor_wave_functions(),
532 wf::band_range(0, kp__.num_occupied_bands(ispn)), phi_atomic_ds_psi, 0, 0);
533
534 for (int ibnd = 0; ibnd < kp__.num_occupied_bands(ispn); ibnd++) {
535 for (int j = 0; j < nawf; j++) {
536 phi_atomic_ds_psi(j, ibnd) += dphi_atomic_s_psi(j, ibnd);
537 }
538 }
539
540 /* build the full d <phi_hub | S | psi_ik> / d epsilon_{mu,nu} matrix */
541 la::dmatrix<std::complex<double>> phi_hub_s_psi_deriv(num_hubbard_wf.first, kp__.num_occupied_bands(ispn));
542 build_phi_hub_s_psi_deriv(ctx_, kp__.num_occupied_bands(ispn), nawf, ovlp, *inv_sqrt_O,
543 phi_atomic_s_psi[ispn], phi_atomic_ds_psi, num_ps_atomic_wf.second, num_hubbard_wf.second,
544 phi_hub_s_psi_deriv);
545
546 /* multiply by eigen-energy */
547 for (int ibnd = 0; ibnd < kp__.num_occupied_bands(ispn); ibnd++) {
548 for (int j = 0; j < num_hubbard_wf.first; j++) {
549 phi_hub_s_psi_deriv(j, ibnd) *= kp__.band_occupancy(ibnd, ispn);
550 }
551 }
552
553 if (ctx_.processing_unit() == sddk::device_t::GPU) {
554 psi_s_phi_hub[ispn].allocate(get_memory_pool(sddk::memory_t::device)).copy_to(sddk::memory_t::device);
555 phi_hub_s_psi_deriv.allocate(get_memory_pool(sddk::memory_t::device)).copy_to(sddk::memory_t::device);
556 }
557
558 /* update the density matrix derivative */
559 update_density_matrix_deriv(la, mt, num_hubbard_wf.first, kp__.num_occupied_bands(ispn),
560 &alpha, phi_hub_s_psi_deriv, psi_s_phi_hub[ispn], dn__.at(mt, 0, 0, ispn, 3 * nu + mu),
561 dn__.ld());
562 }
563 }
564 }
565
566 if (ctx_.processing_unit() == sddk::device_t::GPU) {
567 dn__.copy_to(sddk::memory_t::host);
568 }
569}
570
571} // namespace sirius
Contains declaration and implementation of sirius::Beta_projectors_base class.
Compute gradient of beta-projectors over atomic positions .
void compute_occupancies_stress_derivatives(K_point< double > &kp__, Q_operator< double > &q_op__, sddk::mdarray< std::complex< double >, 4 > &dn__)
Compute derivatives of the occupancy matrix w.r.t.atomic displacement.
void compute_occupancies_derivatives(K_point< double > &kp__, Q_operator< double > &q_op__, sddk::mdarray< std::complex< double >, 5 > &dn__)
Compute the occupancy derivatives with respect to atomic displacements.
auto const & atomic_wave_functions() const
Return the initial atomic orbitals used to compute the hubbard wave functions.
Definition: k_point.hpp:502
double band_occupancy(int j__, int ispn__) const
Get band occupancy.
Definition: k_point.hpp:434
auto const & atomic_wave_functions_S() const
Definition: k_point.hpp:489
double weight() const
Return weight of k-point.
Definition: k_point.hpp:456
auto const & hubbard_wave_functions_S() const
Return the actual hubbard wave functions used in the calculations.
Definition: k_point.hpp:516
int num_occupied_bands(int ispn__=-1) const
Get the number of occupied bands for each spin channel.
Definition: k_point.hpp:399
int num_gkvec_loc() const
Local number of G+k vectors in case of flat distribution.
Definition: k_point.hpp:393
void apply(sddk::memory_t mem__, int chunk__, int ispn_block__, wf::Wave_functions< T > &op_phi__, wf::band_range br__, beta_projectors_coeffs_t< T > const &beta_coeffs__, sddk::matrix< F > const &beta_phi__) const
Apply chunk of beta-projectors to all wave functions.
auto host_memory_t() const
Type of the host memory for arrays used in linear algebra operations.
int num_spins() const
Number of spin components.
Distributed matrix.
Definition: dmatrix.hpp:56
Multidimensional array with the column-major (Fortran) order.
Definition: memory.hpp:660
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Definition: memory.hpp:1057
Describe a range of bands.
Describe a range of spins.
Contains declaration and partial implementation of sirius::Hubbard class.
Compute inverse square root of the matrix.
Memory management functions and classes.
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
Definition: memory.hpp:93
auto inverse_sqrt(la::dmatrix< T > &A__, int N__)
Compute inverse square root of the matrix.
lib_t
Type of linear algebra backend library.
Definition: linalg_base.hpp:70
@ gpublas
GPU BLAS (cuBlas or ROCblas)
void unitary_similarity_transform(int kind__, dmatrix< T > &A__, dmatrix< T > const &U__, int n__)
Definition: linalg.hpp:2018
auto spherical_coordinates(vector< double > vc)
Transform Cartesian coordinates [x,y,z] to spherical coordinates [r,theta,phi].
Definition: r3.hpp:590
int lmax(int lmmax__)
Get maximum orbital quantum number by the maximum lm index.
Definition: specfunc.hpp:56
int lmmax(int lmax)
Maximum number of combinations for a given .
Definition: specfunc.hpp:44
void dRlm_dr(int lmax__, r3::vector< double > &r__, sddk::mdarray< double, 2 > &data__, bool divide_by_r__=true)
Compute the derivatives of real spherical harmonics over the components of cartesian vector.
Definition: specfunc.hpp:512
void spherical_harmonics(int lmax, double theta, double phi, std::complex< double > *ylm)
Optimized implementation of complex spherical harmonics.
Definition: specfunc.hpp:298
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
strong_type< int, struct __rf_index_tag > rf_index
Radial function index.
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165
void apply_S_operator_strain_deriv(sddk::memory_t mem__, int comp__, Beta_projector_generator< double > &bp__, beta_projectors_coeffs_t< double > &bp_coeffs__, Beta_projector_generator< double > &bp_strain_deriv__, beta_projectors_coeffs_t< double > &bp_strain_deriv_coeffs__, wf::Wave_functions< double > &phi__, Q_operator< double > &q_op__, wf::Wave_functions< double > &ds_phi__)
Apply strain derivative of S-operator to all scalar functions.
static const int ia
Global index of atom.