38#include "geometry/wavefunction_strain_deriv.hpp"
43update_density_matrix_deriv(
la::lib_t la__, sddk::memory_t mt__,
int nwfh__,
int nbnd__, std::complex<double>* alpha__,
44 la::dmatrix<std::complex<double>>
const& phi_hub_s_psi_deriv__, la::dmatrix<std::complex<double>>
const& psi_s_phi_hub__,
45 std::complex<double>* dn__,
int ld__)
47 la::wrap(la__).gemm(
'N',
'N', nwfh__, nwfh__, nbnd__, alpha__,
48 phi_hub_s_psi_deriv__.at(mt__, 0, 0), phi_hub_s_psi_deriv__.ld(),
49 psi_s_phi_hub__.at(mt__, 0, 0), psi_s_phi_hub__.ld(),
50 &la::constant<std::complex<double>>::one(), dn__, ld__);
52 la::wrap(la__).gemm(
'C',
'C', nwfh__, nwfh__, nbnd__, alpha__,
53 psi_s_phi_hub__.at(mt__, 0, 0), psi_s_phi_hub__.ld(),
54 phi_hub_s_psi_deriv__.at(mt__, 0, 0), phi_hub_s_psi_deriv__.ld(),
55 &la::constant<std::complex<double>>::one(), dn__, ld__);
59build_phi_hub_s_psi_deriv(Simulation_context
const& ctx__,
int nbnd__,
int nawf__,
60 la::dmatrix<std::complex<double>>
const& ovlp__, la::dmatrix<std::complex<double>>
const& inv_sqrt_O__,
61 la::dmatrix<std::complex<double>>
const& phi_atomic_s_psi__,
62 la::dmatrix<std::complex<double>>
const& phi_atomic_ds_psi__,
63 std::vector<int>
const& atomic_wf_offset__, std::vector<int>
const& hubbard_wf_offset__,
64 la::dmatrix<std::complex<double>>& phi_hub_s_psi_deriv__)
66 phi_hub_s_psi_deriv__.zero();
68 for (
int ia = 0; ia < ctx__.unit_cell().num_atoms(); ia++) {
69 auto& type = ctx__.unit_cell().atom(ia).type();
71 if (type.hubbard_correction()) {
73 for (
auto e : type.indexr_hub()) {
74 auto& hd = type.lo_descriptor_hub(e.idxrf);
78 int idxr_wf = hd.idx_wf();
79 int offset_in_wf = atomic_wf_offset__[ia] + type.indexb_wfs().index_of(
rf_index(idxr_wf));
80 int offset_in_hwf = hubbard_wf_offset__[ia] + type.indexb_hub().index_of(e.idxrf);
82 if (ctx__.cfg().hubbard().full_orthogonalization()) {
85 &la::constant<std::complex<double>>::one(),
86 ovlp__.at(sddk::memory_t::host, 0, offset_in_wf), ovlp__.ld(),
87 phi_atomic_s_psi__.at(sddk::memory_t::host), phi_atomic_s_psi__.ld(),
88 &la::constant<std::complex<double>>::one(),
89 phi_hub_s_psi_deriv__.at(sddk::memory_t::host, offset_in_hwf, 0), phi_hub_s_psi_deriv__.ld());
92 &la::constant<std::complex<double>>::one(),
93 inv_sqrt_O__.at(sddk::memory_t::host, 0, offset_in_wf), inv_sqrt_O__.ld(),
94 phi_atomic_ds_psi__.at(sddk::memory_t::host), phi_atomic_ds_psi__.ld(),
95 &la::constant<std::complex<double>>::one(),
96 phi_hub_s_psi_deriv__.at(sddk::memory_t::host, offset_in_hwf, 0), phi_hub_s_psi_deriv__.ld());
100 for (
int ibnd = 0; ibnd < nbnd__; ibnd++) {
101 for (
int m = 0; m < mmax; m++) {
102 phi_hub_s_psi_deriv__(offset_in_hwf + m, ibnd) = phi_atomic_ds_psi__(offset_in_wf + m, ibnd);
112compute_inv_sqrt_O_deriv(la::dmatrix<std::complex<double>>& O_deriv__, la::dmatrix<std::complex<double>>& evec_O__,
113 std::vector<double>& eval_O__,
int nawf__)
118 for (
int i = 0; i < nawf__; i++) {
119 for (
int j = 0; j < nawf__; j++) {
120 O_deriv__(j, i) /= -(eval_O__[i] * std::sqrt(eval_O__[j]) + eval_O__[j] * std::sqrt(eval_O__[i]));
131 PROFILE(
"sirius::Hubbard::compute_occupancies_derivatives");
134 auto mt = sddk::memory_t::none;
135 switch (ctx_.processing_unit()) {
136 case sddk::device_t::CPU: {
138 mt = sddk::memory_t::host;
141 case sddk::device_t::GPU: {
143 mt = sddk::memory_t::device;
147 auto alpha = std::complex<double>(kp__.
weight(), 0.0);
155 auto num_ps_atomic_wf = ctx_.unit_cell().num_ps_atomic_wf();
156 auto num_hubbard_wf = ctx_.unit_cell().num_hubbard_wf();
159 int nawf = num_ps_atomic_wf.first;
161 int nhwf = num_hubbard_wf.first;
163 RTE_ASSERT(nawf == phi_atomic.num_wf().get());
164 RTE_ASSERT(nawf == phi_atomic_S.num_wf().get());
166 if (ctx_.processing_unit() == sddk::device_t::GPU) {
167 dn__.allocate(sddk::memory_t::device);
172 std::unique_ptr<la::dmatrix<std::complex<double>>> inv_sqrt_O;
173 std::unique_ptr<la::dmatrix<std::complex<double>>> evec_O;
174 std::vector<double> eval_O;
175 if (ctx_.cfg().hubbard().full_orthogonalization()) {
182 inv_sqrt_O = std::move(std::get<0>(result));
183 evec_O = std::move(std::get<1>(result));
184 eval_O = std::get<2>(result);
189 std::array<la::dmatrix<std::complex<double>>, 2> psi_s_phi_hub;
190 for (
int ispn = 0; ispn < ctx_.
num_spins(); ispn++) {
198 auto phi_atomic_tmp = wave_function_factory(ctx_, kp__, phi_atomic.num_wf(),
wf::num_mag_dims(0),
false);
199 auto s_phi_atomic_tmp = wave_function_factory(ctx_, kp__, phi_atomic_S.num_wf(),
wf::num_mag_dims(0),
false);
200 auto mg1 = phi_atomic_tmp->memory_guard(mt);
201 auto mg2 = s_phi_atomic_tmp->memory_guard(mt);
204 std::array<std::array<la::dmatrix<std::complex<double>>, 2>, 3> grad_phi_atomic_s_psi;
205 std::array<la::dmatrix<std::complex<double>>, 3> grad_phi_atomic_s_phi_atomic;
207 auto& bp = kp__.beta_projectors();
208 auto bp_gen = bp.make_generator(mt);
209 auto bp_coeffs = bp_gen.prepare();
211 for (
int x = 0; x < 3; x++) {
213 for (
int i = 0; i < nawf; i++) {
216 auto gk = kp__.gkvec().template gkvec_cart<index_domain_t::local>(igloc);
223 phi_atomic_tmp->copy_to(mt);
227 bp_coeffs, *phi_atomic_tmp, &q_op__, *s_phi_atomic_tmp);
231 if (ctx_.cfg().hubbard().full_orthogonalization()) {
237 for (
int ispn = 0; ispn < ctx_.
num_spins(); ispn++) {
248 std::array<la::dmatrix<std::complex<double>>, 2> phi_atomic_s_psi;
249 if (ctx_.cfg().hubbard().full_orthogonalization()) {
250 for (
int ispn = 0; ispn < ctx_.
num_spins(); ispn++) {
260 auto bp_grad_gen = bp_grad.make_generator(mt);
261 auto bp_grad_coeffs = bp_grad_gen.prepare();
265 for (
int ichunk = 0; ichunk < kp__.beta_projectors().num_chunks(); ichunk++) {
268 bool copy_back_innerb = sddk::is_device_memory(mt);
270 bp_gen.generate(bp_coeffs, ichunk);
271 auto& beta_chunk = bp_coeffs.beta_chunk_;
276 auto beta_phi_atomic =
277 inner_prod_beta<std::complex<double>>(ctx_.spla_context(), mt, ctx_.
host_memory_t(), copy_back_innerb, bp_coeffs,
280 for (
int x = 0; x < 3; x++) {
281 bp_grad_gen.generate(bp_grad_coeffs, ichunk, x);
286 auto grad_beta_phi_atomic = inner_prod_beta<std::complex<double>>(
287 ctx_.spla_context(), mt, ctx_.
host_memory_t(), copy_back_innerb, bp_grad_coeffs, phi_atomic,
290 for (
int i = 0; i < beta_chunk.num_atoms_; i++) {
299 if (ctx_.unit_cell().atom(ja).type().augment()) {
301 bp_grad_coeffs, beta_phi_atomic);
303 bp_coeffs, grad_beta_phi_atomic);
310 if (ctx_.cfg().hubbard().full_orthogonalization()) {
316 auto& type = ctx_.unit_cell().atom(ja).type();
317 for (
int xi = 0; xi < type.indexb_wfs().size(); xi++) {
318 int i = num_ps_atomic_wf.second[ja] + xi;
319 for (
int j = 0; j < nawf; j++) {
320 ovlp(i, j) += grad_phi_atomic_s_phi_atomic[x](i, j);
321 ovlp(j, i) +=
std::conj(grad_phi_atomic_s_phi_atomic[x](i, j));
324 compute_inv_sqrt_O_deriv(ovlp, *evec_O, eval_O, nawf);
327 for (
int ispn = 0; ispn < ctx_.
num_spins(); ispn++) {
336 for (
int xi = 0; xi < ctx_.unit_cell().atom(ja).type().indexb_wfs().size(); xi++) {
337 int i = num_ps_atomic_wf.second[ja] + xi;
338 phi_atomic_ds_psi(i, ibnd) += grad_phi_atomic_s_psi[x][ispn](i, ibnd);
345 build_phi_hub_s_psi_deriv(ctx_, kp__.
num_occupied_bands(ispn), nawf, ovlp, *inv_sqrt_O,
346 phi_atomic_s_psi[ispn], phi_atomic_ds_psi, num_ps_atomic_wf.second, num_hubbard_wf.second,
347 phi_hub_s_psi_deriv);
351 for (
int j = 0; j < num_hubbard_wf.first; j++) {
356 if (ctx_.processing_unit() == sddk::device_t::GPU) {
357 phi_hub_s_psi_deriv.
allocate(get_memory_pool(sddk::memory_t::device)).copy_to(sddk::memory_t::device);
358 psi_s_phi_hub[ispn].allocate(get_memory_pool(sddk::memory_t::device)).copy_to(sddk::memory_t::device);
362 update_density_matrix_deriv(la, mt, num_hubbard_wf.first, kp__.
num_occupied_bands(ispn),
363 &alpha, phi_hub_s_psi_deriv, psi_s_phi_hub[ispn], dn__.at(mt, 0, 0, ispn, x, ja),
370 if (ctx_.processing_unit() == sddk::device_t::GPU) {
371 dn__.copy_to(sddk::memory_t::host);
372 dn__.deallocate(sddk::memory_t::device);
380 PROFILE(
"sirius::Hubbard::compute_occupancies_stress_derivatives");
383 auto mt = sddk::memory_t::none;
384 switch (ctx_.processing_unit()) {
385 case sddk::device_t::CPU: {
387 mt = sddk::memory_t::host;
390 case sddk::device_t::GPU: {
392 mt = sddk::memory_t::device;
396 auto alpha = std::complex<double>(kp__.
weight(), 0.0);
399 auto bp_strain_gen = bp_strain_deriv.make_generator();
401 auto bp_strain_coeffs = bp_strain_gen.prepare();
403 auto bp_gen = kp__.beta_projectors().make_generator();
404 auto bp_coeffs = bp_gen.prepare();
406 const int lmax = ctx_.unit_cell().lmax();
413 #pragma omp parallel for schedule(static)
414 for (
int igkloc = 0; igkloc < kp__.
num_gkvec_loc(); igkloc++) {
429 auto num_ps_atomic_wf = ctx_.unit_cell().num_ps_atomic_wf();
430 auto num_hubbard_wf = ctx_.unit_cell().num_hubbard_wf();
433 int nawf = num_ps_atomic_wf.first;
435 int nhwf = num_hubbard_wf.first;
437 RTE_ASSERT(nawf == phi_atomic.num_wf().get());
438 RTE_ASSERT(nawf == phi_atomic_S.num_wf().get());
439 RTE_ASSERT(nhwf == phi_hub_S.num_wf().get());
441 auto dphi_atomic = wave_function_factory(ctx_, kp__, phi_atomic.num_wf(),
wf::num_mag_dims(0),
false);
442 auto s_dphi_atomic = wave_function_factory(ctx_, kp__, phi_atomic.num_wf(),
wf::num_mag_dims(0),
false);
443 auto ds_phi_atomic = wave_function_factory(ctx_, kp__, phi_atomic.num_wf(),
wf::num_mag_dims(0),
false);
447 std::array<la::dmatrix<std::complex<double>>, 2> psi_s_phi_hub;
448 for (
int ispn = 0; ispn < ctx_.
num_spins(); ispn++) {
452 psi_s_phi_hub[ispn], 0, 0);
457 std::unique_ptr<la::dmatrix<std::complex<double>>> inv_sqrt_O;
458 std::unique_ptr<la::dmatrix<std::complex<double>>> evec_O;
459 std::vector<double> eval_O;
460 if (ctx_.cfg().hubbard().full_orthogonalization()) {
467 inv_sqrt_O = std::move(std::get<0>(result));
468 evec_O = std::move(std::get<1>(result));
469 eval_O = std::get<2>(result);
473 std::array<la::dmatrix<std::complex<double>>, 2> phi_atomic_s_psi;
474 if (ctx_.cfg().hubbard().full_orthogonalization()) {
475 for (
int ispn = 0; ispn < ctx_.
num_spins(); ispn++) {
484 auto mg1 = dphi_atomic->memory_guard(mt);
485 auto mg2 = s_dphi_atomic->memory_guard(mt);
486 auto mg3 = ds_phi_atomic->memory_guard(mt);
487 for (
int nu = 0; nu < 3; nu++) {
488 for (
int mu = 0; mu < 3; mu++) {
490 wavefunctions_strain_deriv(ctx_, kp__, *dphi_atomic, rlm_g, rlm_dg, nu, mu);
493 dphi_atomic->copy_to(mt);
498 bp_gen, bp_coeffs, *dphi_atomic, &q_op__,
503 phi_atomic, q_op__, *ds_phi_atomic);
505 if (ctx_.cfg().hubbard().full_orthogonalization()) {
515 for (
int i = 0; i < nawf; i++) {
516 for (
int j = 0; j < nawf; j++) {
517 ovlp(i, j) += tmp(i, j) +
std::conj(tmp(j, i));
520 compute_inv_sqrt_O_deriv(ovlp, *evec_O, eval_O, nawf);
523 for (
int ispn = 0; ispn < ctx_.
num_spins(); ispn++) {
535 for (
int j = 0; j < nawf; j++) {
536 phi_atomic_ds_psi(j, ibnd) += dphi_atomic_s_psi(j, ibnd);
542 build_phi_hub_s_psi_deriv(ctx_, kp__.
num_occupied_bands(ispn), nawf, ovlp, *inv_sqrt_O,
543 phi_atomic_s_psi[ispn], phi_atomic_ds_psi, num_ps_atomic_wf.second, num_hubbard_wf.second,
544 phi_hub_s_psi_deriv);
548 for (
int j = 0; j < num_hubbard_wf.first; j++) {
553 if (ctx_.processing_unit() == sddk::device_t::GPU) {
554 psi_s_phi_hub[ispn].allocate(get_memory_pool(sddk::memory_t::device)).copy_to(sddk::memory_t::device);
555 phi_hub_s_psi_deriv.
allocate(get_memory_pool(sddk::memory_t::device)).copy_to(sddk::memory_t::device);
559 update_density_matrix_deriv(la, mt, num_hubbard_wf.first, kp__.
num_occupied_bands(ispn),
560 &alpha, phi_hub_s_psi_deriv, psi_s_phi_hub[ispn], dn__.at(mt, 0, 0, ispn, 3 * nu + mu),
566 if (ctx_.processing_unit() == sddk::device_t::GPU) {
567 dn__.copy_to(sddk::memory_t::host);
Contains declaration and implementation of sirius::Beta_projectors_base class.
Compute gradient of beta-projectors over atomic positions .
void compute_occupancies_stress_derivatives(K_point< double > &kp__, Q_operator< double > &q_op__, sddk::mdarray< std::complex< double >, 4 > &dn__)
Compute derivatives of the occupancy matrix w.r.t.atomic displacement.
void compute_occupancies_derivatives(K_point< double > &kp__, Q_operator< double > &q_op__, sddk::mdarray< std::complex< double >, 5 > &dn__)
Compute the occupancy derivatives with respect to atomic displacements.
auto const & atomic_wave_functions() const
Return the initial atomic orbitals used to compute the hubbard wave functions.
double band_occupancy(int j__, int ispn__) const
Get band occupancy.
auto const & atomic_wave_functions_S() const
double weight() const
Return weight of k-point.
auto const & hubbard_wave_functions_S() const
Return the actual hubbard wave functions used in the calculations.
int num_occupied_bands(int ispn__=-1) const
Get the number of occupied bands for each spin channel.
int num_gkvec_loc() const
Local number of G+k vectors in case of flat distribution.
void apply(sddk::memory_t mem__, int chunk__, int ispn_block__, wf::Wave_functions< T > &op_phi__, wf::band_range br__, beta_projectors_coeffs_t< T > const &beta_coeffs__, sddk::matrix< F > const &beta_phi__) const
Apply chunk of beta-projectors to all wave functions.
auto host_memory_t() const
Type of the host memory for arrays used in linear algebra operations.
int num_spins() const
Number of spin components.
Multidimensional array with the column-major (Fortran) order.
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Describe a range of bands.
Describe a range of spins.
Contains declaration and partial implementation of sirius::Hubbard class.
Compute inverse square root of the matrix.
Memory management functions and classes.
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
auto inverse_sqrt(la::dmatrix< T > &A__, int N__)
Compute inverse square root of the matrix.
lib_t
Type of linear algebra backend library.
@ gpublas
GPU BLAS (cuBlas or ROCblas)
void unitary_similarity_transform(int kind__, dmatrix< T > &A__, dmatrix< T > const &U__, int n__)
auto spherical_coordinates(vector< double > vc)
Transform Cartesian coordinates [x,y,z] to spherical coordinates [r,theta,phi].
int lmax(int lmmax__)
Get maximum orbital quantum number by the maximum lm index.
int lmmax(int lmax)
Maximum number of combinations for a given .
void dRlm_dr(int lmax__, r3::vector< double > &r__, sddk::mdarray< double, 2 > &data__, bool divide_by_r__=true)
Compute the derivatives of real spherical harmonics over the components of cartesian vector.
void spherical_harmonics(int lmax, double theta, double phi, std::complex< double > *ylm)
Optimized implementation of complex spherical harmonics.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
Namespace of the SIRIUS library.
strong_type< int, struct __rf_index_tag > rf_index
Radial function index.
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
void apply_S_operator_strain_deriv(sddk::memory_t mem__, int comp__, Beta_projector_generator< double > &bp__, beta_projectors_coeffs_t< double > &bp_coeffs__, Beta_projector_generator< double > &bp_strain_deriv__, beta_projectors_coeffs_t< double > &bp_strain_deriv_coeffs__, wf::Wave_functions< double > &phi__, Q_operator< double > &q_op__, wf::Wave_functions< double > &ds_phi__)
Apply strain derivative of S-operator to all scalar functions.
static const int ia
Global index of atom.