SIRIUS 7.5.0
Electronic structure library and applications
density.cpp
Go to the documentation of this file.
1// Copyright (c) 2013-2019 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5
6// the following conditions are met:
7//
8// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
9// following disclaimer.
10// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
11// and the following disclaimer in the documentation and/or other materials provided with the distribution.
12//
13// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
14// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
15// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
16// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
17// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
18// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
19// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20
21/** \file density.cpp
22 *
23 * \brief Contains implementation of sirius::Density class.
24 */
25
26#include "core/profiler.hpp"
35#include "lapw/sum_fg_fl_yg.hpp"
37#include "density.hpp"
38
39namespace sirius {
40
41#if defined(SIRIUS_GPU)
42void
43update_density_rg_1_real_gpu(int size__, float const* psi_rg__, float wt__, float* density_rg__)
44{
45 update_density_rg_1_real_gpu_float(size__, psi_rg__, wt__, density_rg__);
46}
47
48void
49update_density_rg_1_real_gpu(int size__, double const* psi_rg__, double wt__, double* density_rg__)
50{
51 update_density_rg_1_real_gpu_double(size__, psi_rg__, wt__, density_rg__);
52}
53
54void
55update_density_rg_1_complex_gpu(int size__, std::complex<float> const* psi_rg__, float wt__, float* density_rg__)
56{
57 update_density_rg_1_complex_gpu_float(size__, psi_rg__, wt__, density_rg__);
58}
59
60void
61update_density_rg_1_complex_gpu(int size__, std::complex<double> const* psi_rg__, double wt__, double* density_rg__)
62{
63 update_density_rg_1_complex_gpu_double(size__, psi_rg__, wt__, density_rg__);
64}
65
66void
67update_density_rg_2_gpu(int size__, std::complex<float> const* psi_rg_up__, std::complex<float> const* psi_rg_dn__,
68 float wt__, float* density_x_rg__, float* density_y_rg__)
69{
70 update_density_rg_2_gpu_float(size__, psi_rg_up__, psi_rg_dn__, wt__, density_x_rg__, density_y_rg__);
71}
72
73void
74update_density_rg_2_gpu(int size__, std::complex<double> const* psi_rg_up__, std::complex<double> const* psi_rg_dn__,
75 double wt__, double* density_x_rg__, double* density_y_rg__)
76{
77 update_density_rg_2_gpu_double(size__, psi_rg_up__, psi_rg_dn__, wt__, density_x_rg__, density_y_rg__);
78}
79#endif
80
82 : Field4D(ctx__, lmax_t(ctx__.lmax_rho()), {ctx__.periodic_function_ptr("rho"), ctx__.periodic_function_ptr("magz"),
83 ctx__.periodic_function_ptr("magx"), ctx__.periodic_function_ptr("magy")})
84 , unit_cell_(ctx_.unit_cell())
85{
86 PROFILE("sirius::Density");
87
88 if (!ctx_.initialized()) {
89 RTE_THROW("Simulation_context is not initialized");
90 }
91
92 using spf = Smooth_periodic_function<double>;
93
94 /* allocate charge density and magnetization on a coarse grid */
95 for (int i = 0; i < ctx_.num_mag_dims() + 1; i++) {
96 rho_mag_coarse_[i] = std::make_unique<spf>(ctx_.spfft_coarse<double>(), ctx_.gvec_coarse_fft_sptr());
97 }
98
99 /* core density of the pseudopotential method */
100 if (!ctx_.full_potential()) {
101 rho_pseudo_core_ = std::make_unique<spf>(ctx_.spfft<double>(), ctx_.gvec_fft_sptr());
102 }
103
104 l_by_lm_ = sf::l_by_lm(ctx_.lmax_rho());
105
106 density_matrix_ = std::make_unique<density_matrix_t>(ctx_.unit_cell(), ctx_.num_mag_comp());
107
108 if (ctx_.hubbard_correction()) {
109 occupation_matrix_ = std::make_unique<Occupation_matrix>(ctx_);
110 }
111
112 if (unit_cell_.num_paw_atoms()) {
113 paw_density_ = std::make_unique<PAW_density<double>>(unit_cell_);
114 }
115
116 update();
117}
118
119void
121{
122 PROFILE("sirius::Density::update");
123
124 if (!ctx_.full_potential()) {
125 rho_pseudo_core_->zero();
126 bool is_empty{true};
127 for (int iat = 0; iat < unit_cell_.num_atom_types(); iat++) {
128 is_empty &= unit_cell_.atom_type(iat).ps_core_charge_density().empty();
129 }
130 if (!is_empty) {
131 generate_pseudo_core_charge_density();
132 }
133 }
134}
135
136/// Find the total leakage of the core states out of the muffin-tins
137double
139{
140 double sum{0};
141 for (int ic = 0; ic < unit_cell_.num_atom_symmetry_classes(); ic++) {
143 }
144 return sum;
145}
146
147void
149{
150 PROFILE("sirius::Density::initial_density");
151
152 zero();
153
154 if (ctx_.full_potential()) {
155 initial_density_full_pot();
156 } else {
157 initial_density_pseudo();
158
160
162
163 if (occupation_matrix_) {
164 occupation_matrix_->init();
165 }
166 }
167 if (ctx_.use_symmetry()) {
168 symmetrize_field4d(*this);
169 }
170}
171
172void
173Density::initial_density_pseudo()
174{
175 /* get lenghts of all G shells */
176 auto q = ctx_.gvec().shells_len();
177 /* get form-factors for all G shells */
178 // TODO: MPI parallelise over G-shells
179 auto const ff = ctx_.ri().ps_rho_->values(q, ctx_.comm());
180 /* make Vloc(G) */
181 auto v = make_periodic_function<index_domain_t::local>(ctx_.unit_cell(), ctx_.gvec(),
182 ctx_.phase_factors_t(), ff);
183
184 if (env::print_checksum()) {
185 auto z1 = sddk::mdarray<std::complex<double>, 1>(&v[0], ctx_.gvec().count()).checksum();
186 ctx_.comm().allreduce(&z1, 1);
187 print_checksum("rho_pw_init", z1, ctx_.out());
188 }
189 std::copy(v.begin(), v.end(), &rho().rg().f_pw_local(0));
190
191 if (env::print_hash()) {
192 auto h = sddk::mdarray<std::complex<double>, 1>(&v[0], ctx_.gvec().count()).hash();
193 print_hash("rho_pw_init", h, ctx_.out());
194 }
195
196 double charge = rho().rg().f_0().real() * unit_cell_.omega();
197
198 if (std::abs(charge - unit_cell_.num_valence_electrons()) > 1e-6) {
199 std::stringstream s;
200 s << "wrong initial charge density" << std::endl
201 << " integral of the density : " << std::setprecision(12) << charge << std::endl
202 << " target number of electrons : " << std::setprecision(12) << unit_cell_.num_valence_electrons();
203 if (ctx_.comm().rank() == 0) {
204 RTE_WARNING(s);
205 }
206 }
207 rho().rg().fft_transform(1);
208 if (env::print_hash()) {
209 auto h = rho().rg().values().hash();
210 print_hash("rho_rg_init", h, ctx_.out());
211 }
212
213 /* remove possible negative noise */
214 for (int ir = 0; ir < ctx_.spfft<double>().local_slice_size(); ir++) {
215 rho().rg().value(ir) = std::max(rho().rg().value(ir), 0.0);
216 }
217 /* renormalize charge */
218 normalize();
219
220 if (env::print_checksum()) {
221 auto cs = rho().rg().checksum_rg();
222 print_checksum("rho_rg_init", cs, ctx_.out());
223 }
224
225 /* initialize the magnetization */
226 if (ctx_.num_mag_dims()) {
227 auto Rmt = unit_cell_.find_mt_radii(1, true);
228
229 /* auxiliary weight function; the volume integral of this function is equal to 1 */
230 auto w = [](double R, double x) {
231 double norm = 3.1886583903476735 * std::pow(R, 3);
232
233 return (1 - std::pow(x / R, 2)) * std::exp(x / R) / norm;
234 };
235
236 for (int ia = 0; ia < unit_cell_.num_atoms(); ia++) {
237 auto& atom_to_grid_map = ctx_.atoms_to_grid_idx_map(ia);
238
239 auto v = unit_cell_.atom(ia).vector_field();
240
241 for (auto coord : atom_to_grid_map) {
242 int ir = coord.first;
243 double r = coord.second;
244 double f = w(Rmt[unit_cell_.atom(ia).type_id()], r);
245 mag(0).rg().value(ir) += v[2] * f;
246 if (ctx_.num_mag_dims() == 3) {
247 mag(1).rg().value(ir) += v[0] * f;
248 mag(2).rg().value(ir) += v[1] * f;
249 }
250 }
251 }
252 }
253 this->fft_transform(-1);
254
255 if (env::print_checksum()) {
256 for (int i = 0; i < ctx_.num_mag_dims() + 1; i++) {
257 auto cs = component(i).rg().checksum_rg();
258 auto cs1 = component(i).rg().checksum_pw();
259 std::stringstream s;
260 s << "component[" << i << "]_rg";
261 print_checksum(s.str(), cs, ctx_.out());
262 std::stringstream s1;
263 s1 << "component[" << i << "]_pw";
264 print_checksum(s1.str(), cs1, ctx_.out());
265 }
266 }
267}
268
269void
270Density::initial_density_full_pot()
271{
272 /* initialize smooth density of free atoms */
273 for (int iat = 0; iat < unit_cell_.num_atom_types(); iat++) {
275 }
276
277 /* compute radial integrals */
278 Radial_integrals_rho_free_atom ri(ctx_.unit_cell(), ctx_.pw_cutoff(), 40);
279
280 /* compute contribution from free atoms to the interstitial density */
281 auto v = make_periodic_function<index_domain_t::local>(ctx_.unit_cell(), ctx_.gvec(), ctx_.phase_factors_t(),
282 [&ri](int iat, double g) { return ri.value(iat, g); });
283
284 /* initialize density of free atoms (not smoothed) */
285 for (int iat = 0; iat < unit_cell_.num_atom_types(); iat++) {
287 }
288
289 if (env::print_checksum()) {
290 auto z = sddk::mdarray<std::complex<double>, 1>(&v[0], ctx_.gvec().count()).checksum();
291 ctx_.comm().allreduce(&z, 1);
292 print_checksum("rho_pw", z, ctx_.out());
293 }
294
295 /* set plane-wave coefficients of the charge density */
296 std::copy(v.begin(), v.end(), &rho().rg().f_pw_local(0));
297 /* convert charge density to real space mesh */
298 rho().rg().fft_transform(1);
299
300 if (env::print_checksum()) {
301 auto cs = rho().rg().checksum_rg();
302 print_checksum("rho_rg", cs, ctx_.out());
303 }
304
305 /* remove possible negative noise */
306 for (int ir = 0; ir < ctx_.spfft<double>().local_slice_size(); ir++) {
307 rho().rg().value(ir) = std::max(0.0, rho().rg().value(ir));
308 }
309
310 /* set Y00 component of charge density */
311 for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
312 int nmtp = ctx_.unit_cell().atom(ia).num_mt_points();
313
314 for (int ir = 0; ir < nmtp; ir++) {
315 double x = ctx_.unit_cell().atom(ia).radial_grid(ir);
316 rho().mt()[ia](0, ir) = unit_cell_.atom(ia).type().free_atom_density(x) / y00;
317 }
318 }
319
320 int lmax = ctx_.lmax_rho();
321 int lmmax = sf::lmmax(lmax);
322
323 auto l_by_lm = sf::l_by_lm(lmax);
324
325 std::vector<std::complex<double>> zil(lmax + 1);
326 for (int l = 0; l <= lmax; l++) {
327 zil[l] = std::pow(std::complex<double>(0, 1), l);
328 }
329
330 /* compute boundary value at MT sphere from the plane-wave expansion */
331 auto gvec_ylm = generate_gvec_ylm(ctx_, lmax);
332
333 auto sbessel_mt = generate_sbessel_mt(ctx_, lmax);
334
335 auto flm = sum_fg_fl_yg(ctx_, lmax, v.at(sddk::memory_t::host), sbessel_mt, gvec_ylm);
336
337 /* this is the difference between the value of periodic charge density at MT boundary and
338 a value of the atom's free density at the boundary */
339 for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
340 double R = ctx_.unit_cell().atom(ia).mt_radius();
341 double c = unit_cell_.atom(ia).type().free_atom_density(R) / y00;
342 flm(0, ia) -= c;
343 }
344
345 /* match density at MT */
346 for (int iat = 0; iat < ctx_.unit_cell().num_atom_types(); iat++) {
347 sddk::mdarray<double, 2> rRl(ctx_.unit_cell().max_num_mt_points(), lmax + 1);
348 double R = ctx_.unit_cell().atom_type(iat).mt_radius();
349 int nmtp = ctx_.unit_cell().atom_type(iat).num_mt_points();
350
351 #pragma omp parallel for default(shared)
352 for (int l = 0; l <= lmax; l++) {
353 for (int ir = 0; ir < nmtp; ir++) {
354 rRl(ir, l) = std::pow(ctx_.unit_cell().atom_type(iat).radial_grid(ir) / R, 2);
355 }
356 }
357 #pragma omp parallel for default(shared)
358 for (int i = 0; i < unit_cell_.atom_type(iat).num_atoms(); i++) {
359 int ia = unit_cell_.atom_type(iat).atom_id(i);
360 std::vector<double> glm(lmmax);
361 SHT::convert(lmax, &flm(0, ia), &glm[0]);
362 for (int lm = 0; lm < lmmax; lm++) {
363 int l = l_by_lm[lm];
364 for (int ir = 0; ir < nmtp; ir++) {
365 rho().mt()[ia](lm, ir) += glm[lm] * rRl(ir, l);
366 }
367 }
368 }
369 }
370
371 /* normalize charge density */
372 normalize();
373
375
376 // FILE* fout = fopen("rho.dat", "w");
377 // for (int i = 0; i <= 10000; i++) {
378 // r3::vector<double> v = (i / 10000.0) * r3::vector<double>({10.26, 10.26, 10.26});
379 // double val = rho().value(v);
380 // fprintf(fout, "%18.12f %18.12f\n", v.length(), val);
381 //}
382 // fclose(fout);
383
384 // FILE* fout2 = fopen("rho_rg.dat", "w");
385 // for (int i = 0; i <= 10000; i++) {
386 // r3::vector<double> v = (i / 10000.0) * r3::vector<double>({10.26, 10.26, 10.26});
387 // double val = rho().value_rg(v);
388 // fprintf(fout2, "%18.12f %18.12f\n", v.length(), val);
389 //}
390 // fclose(fout2);
391
392 /* initialize the magnetization */
393 if (ctx_.num_mag_dims()) {
394 for (auto it : unit_cell_.spl_num_atoms()) {
395 int ia = it.i;
396 auto v = unit_cell_.atom(ia).vector_field();
397 auto len = v.length();
398
399 int nmtp = unit_cell_.atom(ia).num_mt_points();
400 Spline<double> rho_s(unit_cell_.atom(ia).type().radial_grid());
401 double R = unit_cell_.atom(ia).mt_radius();
402 for (int ir = 0; ir < nmtp; ir++) {
403 double x = unit_cell_.atom(ia).type().radial_grid(ir);
404 rho_s(ir) = this->rho().mt()[ia](0, ir) * y00 *
405 (1 - 3 * std::pow(x / R, 2) + 2 * std::pow(x / R, 3));
406 }
407
408 /* maximum magnetization which can be achieved if we smooth density towards MT boundary */
409 double q = fourpi * rho_s.interpolate().integrate(2);
410
411 /* if very strong initial magnetization is given */
412 if (q < len) {
413 /* renormalize starting magnetization */
414 for (int x : {0, 1, 2}) {
415 v[x] *= (q / len);
416 }
417 len = q;
418 }
419
420 if (len > 1e-8) {
421 for (int ir = 0; ir < nmtp; ir++) {
422 mag(0).mt()[ia](0, ir) = rho_s(ir) * v[2] / q / y00;
423 }
424 if (ctx_.num_mag_dims() == 3) {
425 for (int ir = 0; ir < nmtp; ir++) {
426 mag(1).mt()[ia](0, ir) = rho_s(ir) * v[0] / q / y00;
427 mag(2).mt()[ia](0, ir) = rho_s(ir) * v[1] / q / y00;
428 }
429 }
430 }
431 }
432 }
433}
434
435void
437{
438 for (int ipaw = 0; ipaw < unit_cell_.num_paw_atoms(); ipaw++) {
440
441 auto& dm = density_matrix(ia);
442 dm.zero();
443
444 auto& atom = unit_cell_.atom(ia);
445 auto& atom_type = atom.type();
446
447 int nbf = atom_type.mt_basis_size();
448
449 auto& occupations = atom_type.paw_wf_occ();
450
451 /* magnetization vector */
452 auto magn = atom.vector_field();
453
454 for (int xi = 0; xi < nbf; xi++) {
455 auto& basis_func_index_dsc = atom_type.indexb()[xi];
456
457 int rad_func_index = basis_func_index_dsc.idxrf;
458
459 double occ = occupations[rad_func_index];
460
461 int l = basis_func_index_dsc.am.l();
462
463 switch (ctx_.num_mag_dims()) {
464 case 0: {
465 dm(xi, xi, 0) = occ / double(2 * l + 1);
466 break;
467 }
468
469 case 3:
470 case 1: {
471 double nm = (std::abs(magn[2]) < 1.0) ? magn[2] : std::copysign(1, magn[2]);
472 dm(xi, xi, 0) = 0.5 * (1.0 + nm) * occ / double(2 * l + 1);
473 dm(xi, xi, 1) = 0.5 * (1.0 - nm) * occ / double(2 * l + 1);
474 break;
475 }
476 }
477 }
478 }
479}
480
481void
483{
484 auto ia_paw = ctx_.unit_cell().spl_num_paw_atoms(ialoc__);
485 auto ia = ctx_.unit_cell().paw_atom_index(ia_paw);
486
487 auto& atom_type = ctx_.unit_cell().atom(ia).type();
488
489 auto lmax = atom_type.indexr().lmax();
490
491 auto l_by_lm = sf::l_by_lm(2 * lmax);
492
493 /* get gaunt coefficients */
495
496 paw_density_->zero(ia);
497
498 /* get radial grid to divide density over r^2 */
499 auto& grid = atom_type.radial_grid();
500
501 auto& paw_ae_wfs = atom_type.ae_paw_wfs_array();
502 auto& paw_ps_wfs = atom_type.ps_paw_wfs_array();
503
504 auto dm = this->density_matrix_aux(atom_index_t::global(ia));
505
506 for (int imagn = 0; imagn < ctx_.num_mag_dims() + 1; imagn++) {
507 auto& ae_dens = paw_density_->ae_density(imagn, ia);
508 auto& ps_dens = paw_density_->ps_density(imagn, ia);
509
510 /* iterate over local basis functions (or over lm1 and lm2) */
511 for (int xi2 = 0; xi2 < atom_type.indexb().size(); xi2++) {
512 int lm2 = atom_type.indexb(xi2).lm;
513 int irb2 = atom_type.indexb(xi2).idxrf;
514
515 for (int xi1 = 0; xi1 <= xi2; xi1++) {
516 int lm1 = atom_type.indexb(xi1).lm;
517 int irb1 = atom_type.indexb(xi1).idxrf;
518
519 /* get num of non-zero GC */
520 int num_non_zero_gc = GC.num_gaunt(lm1, lm2);
521
522 double diag_coef = (xi1 == xi2) ? 1.0 : 2.0;
523
524 auto idx = packed_index(xi1, xi2);
525
526 /* add nonzero coefficients */
527 for (int inz = 0; inz < num_non_zero_gc; inz++) {
528 auto& lm3coef = GC.gaunt(lm1, lm2, inz);
529
530 /* iterate over radial points */
531 for (int irad = 0; irad < grid.num_points(); irad++) {
532
533 /* we need to divide density over r^2 since wave functions are stored multiplied by r */
534 double inv_r2 = diag_coef / (grid[irad] * grid[irad]);
535
536 /* calculate unified density/magnetization
537 * dm_ij * GauntCoef * ( phi_i phi_j + Q_ij) */
538 ae_dens(lm3coef.lm3, irad) +=
539 dm(idx, imagn) * inv_r2 * lm3coef.coef * paw_ae_wfs(irad, irb1) * paw_ae_wfs(irad, irb2);
540 ps_dens(lm3coef.lm3, irad) +=
541 dm(idx, imagn) * inv_r2 * lm3coef.coef *
542 (paw_ps_wfs(irad, irb1) * paw_ps_wfs(irad, irb2) +
543 atom_type.q_radial_function(irb1, irb2, l_by_lm[lm3coef.lm3])(irad));
544 }
545 }
546 }
547 }
548 }
549}
550
551void
553{
554 if (!unit_cell_.num_paw_atoms()) {
555 return;
556 }
557
558 PROFILE("sirius::Density::generate_paw_density");
559
560 #pragma omp parallel for
561 for (auto it : unit_cell_.spl_num_paw_atoms()) {
563 }
564}
565
566/// Compute non-magnetic or up- or dn- contribution of the wave-functions to the charge density.
567template <typename T>
568static void
569add_k_point_contribution_rg_collinear(fft::spfft_transform_type<T>& fft__, int ispn__, T w__, T const* inp_wf__, int nr__,
570 bool gamma__, sddk::mdarray<T, 2>& density_rg__)
571{
572 /* transform to real space */
573 fft__.backward(inp_wf__, fft__.processing_unit());
574
575 /* location of the real-space wave-functions psi(r) */
576 auto data_ptr = fft__.space_domain_data(fft__.processing_unit());
577
578 switch (fft__.processing_unit()) {
579 case SPFFT_PU_HOST: {
580 if (gamma__) {
581 #pragma omp parallel for
582 for (int ir = 0; ir < nr__; ir++) {
583 density_rg__(ir, ispn__) += w__ * std::pow(data_ptr[ir], 2);
584 }
585 } else {
586 auto data = reinterpret_cast<std::complex<T>*>(data_ptr);
587 #pragma omp parallel for
588 for (int ir = 0; ir < nr__; ir++) {
589 auto z = data[ir];
590 density_rg__(ir, ispn__) += w__ * (std::pow(z.real(), 2) + std::pow(z.imag(), 2));
591 }
592 }
593 break;
594 }
595 case SPFFT_PU_GPU: {
596#if defined(SIRIUS_GPU)
597 if (gamma__) {
598 update_density_rg_1_real_gpu(nr__, data_ptr, w__, density_rg__.at(sddk::memory_t::device, 0, ispn__));
599 } else {
600 auto data = reinterpret_cast<std::complex<T>*>(data_ptr);
601 update_density_rg_1_complex_gpu(nr__, data, w__, density_rg__.at(sddk::memory_t::device, 0, ispn__));
602 }
603#endif
604 break;
605 }
606 }
607}
608
609/// Compute contribution to density and megnetisation from the 2-component spinor wave-functions.
610template <typename T>
611static
612void add_k_point_contribution_rg_noncollinear(fft::spfft_transform_type<T>& fft__, T w__, T const* inp_wf_up__,
613 T const* inp_wf_dn__, int nr__, sddk::mdarray<std::complex<T>, 1>& psi_r_up__, sddk::mdarray<T, 2>& density_rg__)
614{
615 /* location of the real-space wave-functions psi(r) */
616 auto data_ptr = fft__.space_domain_data(fft__.processing_unit());
617
618 /* transform up- component to real space */
619 fft__.backward(inp_wf_up__, fft__.processing_unit());
620
621 /* this is a non-collinear case, so the wave-functions and FFT buffer are complex */
622 switch (fft__.processing_unit()) {
623 case SPFFT_PU_HOST: {
624 auto inp = reinterpret_cast<std::complex<T>*>(data_ptr);
625 std::copy(inp, inp + nr__, psi_r_up__.at(sddk::memory_t::host));
626 break;
627 }
628 case SPFFT_PU_GPU: {
629 acc::copy(psi_r_up__.at(sddk::memory_t::device), reinterpret_cast<std::complex<T>*>(data_ptr), nr__);
630 break;
631 }
632 }
633
634 /* transform to real space */
635 fft__.backward(inp_wf_dn__, fft__.processing_unit());
636
637 /* alias for dn- component of wave-functions */
638 auto psi_r_dn = reinterpret_cast<std::complex<T>*>(data_ptr);
639 auto& psi_r_up = psi_r_up__;
640
641 switch (fft__.processing_unit()) {
642 case SPFFT_PU_HOST: {
643 #pragma omp parallel for
644 for (int ir = 0; ir < nr__; ir++) {
645 auto r0 = (std::pow(psi_r_up[ir].real(), 2) + std::pow(psi_r_up[ir].imag(), 2)) * w__;
646 auto r1 = (std::pow(psi_r_dn[ir].real(), 2) + std::pow(psi_r_dn[ir].imag(), 2)) * w__;
647
648 auto z2 = psi_r_up[ir] * std::conj(psi_r_dn[ir]) * std::complex<T>(w__, 0);
649
650 density_rg__(ir, 0) += r0;
651 density_rg__(ir, 1) += r1;
652 density_rg__(ir, 2) += 2.0 * std::real(z2);
653 density_rg__(ir, 3) -= 2.0 * std::imag(z2);
654 }
655 break;
656 }
657 case SPFFT_PU_GPU: {
658#ifdef SIRIUS_GPU
659 /* add up-up contribution */
660 update_density_rg_1_complex_gpu(nr__, psi_r_up.at(sddk::memory_t::device), w__,
661 density_rg__.at(sddk::memory_t::device, 0, 0));
662 /* add dn-dn contribution */
663 update_density_rg_1_complex_gpu(nr__, psi_r_dn, w__, density_rg__.at(sddk::memory_t::device, 0, 1));
664 /* add off-diagonal contribution */
665 update_density_rg_2_gpu(nr__, psi_r_up.at(sddk::memory_t::device), psi_r_dn, w__,
666 density_rg__.at(sddk::memory_t::device, 0, 2),
667 density_rg__.at(sddk::memory_t::device, 0, 3));
668#endif
669 break;
670 }
671 }
672}
673
674template <typename T>
675void
677{
678 PROFILE("sirius::Density::add_k_point_contribution_rg");
679
680 double omega = unit_cell_.omega();
681
682 auto& fft = ctx_.spfft_coarse<T>();
683
684 /* local number of real-space points */
685 int nr = fft.local_slice_size();
686
687 /* get preallocated memory */
688 sddk::mdarray<T, 2> density_rg(nr, ctx_.num_mag_dims() + 1, get_memory_pool(sddk::memory_t::host), "density_rg");
689 density_rg.zero();
690
691 if (fft.processing_unit() == SPFFT_PU_GPU) {
692 density_rg.allocate(get_memory_pool(sddk::memory_t::device)).zero(sddk::memory_t::device);
693 }
694
695 /* non-magnetic or collinear case */
696 if (ctx_.num_mag_dims() != 3) {
697 /* loop over pure spinor components */
698 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
699 /* where fft-transformed wave-functions are stored */
700 auto wf_mem = wf_fft__[ispn].on_device() ? sddk::memory_t::device : sddk::memory_t::host;
701 /* local number of bands for a fft-distribution */
702 int nbnd = wf_fft__[ispn].num_wf_local();
703 for (int i = 0; i < nbnd; i++) {
704 auto j = wf_fft__[ispn].spl_num_wf().global_index(i);
705 T w = kp__->band_occupancy(j, ispn) * kp__->weight() / omega;
706
707 auto inp_wf = wf_fft__[ispn].pw_coeffs_spfft(wf_mem, wf::band_index(i));
708
709 add_k_point_contribution_rg_collinear(kp__->spfft_transform(), ispn, w, inp_wf, nr, ctx_.gamma_point(), density_rg);
710 }
711 } // ispn
712 } else { /* non-collinear case */
713 /* allocate on CPU or GPU */
714 sddk::mdarray<std::complex<T>, 1> psi_r_up(nr, get_memory_pool(sddk::memory_t::host));
715 if (fft.processing_unit() == SPFFT_PU_GPU) {
716 psi_r_up.allocate(get_memory_pool(sddk::memory_t::device));
717 }
718
719 RTE_ASSERT(wf_fft__[0].num_wf_local() == wf_fft__[1].num_wf_local());
720
721 int nbnd = wf_fft__[0].num_wf_local();
722 for (int i = 0; i < nbnd; i++) {
723 auto j = wf_fft__[0].spl_num_wf().global_index(i);
724 T w = kp__->band_occupancy(j, 0) * kp__->weight() / omega;
725
726 auto wf_mem_up = wf_fft__[0].on_device() ? sddk::memory_t::device : sddk::memory_t::host;
727 auto wf_mem_dn = wf_fft__[1].on_device() ? sddk::memory_t::device : sddk::memory_t::host;
728
729 /* up- and dn- components */
730 auto inp_wf_up = wf_fft__[0].pw_coeffs_spfft(wf_mem_up, wf::band_index(i));
731 auto inp_wf_dn = wf_fft__[1].pw_coeffs_spfft(wf_mem_dn, wf::band_index(i));
732
733 add_k_point_contribution_rg_noncollinear(kp__->spfft_transform(), w, inp_wf_up, inp_wf_dn, nr, psi_r_up,
734 density_rg);
735 }
736 }
737
738 if (fft.processing_unit() == SPFFT_PU_GPU) {
739 density_rg.copy_to(sddk::memory_t::host);
740 }
741
742 /* switch from real density matrix to density and magnetization */
743 switch (ctx_.num_mag_dims()) {
744 case 3: {
745 #pragma omp parallel for
746 for (int ir = 0; ir < nr; ir++) {
747 rho_mag_coarse_[2]->value(ir) += density_rg(ir, 2); // Mx
748 rho_mag_coarse_[3]->value(ir) += density_rg(ir, 3); // My
749 }
750 }
751 case 1: {
752 #pragma omp parallel for
753 for (int ir = 0; ir < nr; ir++) {
754 rho_mag_coarse_[0]->value(ir) += (density_rg(ir, 0) + density_rg(ir, 1)); // rho
755 rho_mag_coarse_[1]->value(ir) += (density_rg(ir, 0) - density_rg(ir, 1)); // Mz
756 }
757 break;
758 }
759 case 0: {
760 #pragma omp parallel for
761 for (int ir = 0; ir < nr; ir++) {
762 rho_mag_coarse_[0]->value(ir) += density_rg(ir, 0); // rho
763 }
764 }
765 }
766}
767
768template <typename T>
769static void
770add_k_point_contribution_dm_fplapw(Simulation_context const& ctx__, K_point<T> const& kp__,
771 density_matrix_t& density_matrix__)
772{
773 PROFILE("sirius::add_k_point_contribution_dm_fplapw");
774
775 auto& uc = ctx__.unit_cell();
776
777 auto one = la::constant<std::complex<double>>::one();
778
779 /* add |psi_j> n_j <psi_j| to density matrix */
780 #pragma omp parallel
781 {
782 sddk::mdarray<std::complex<double>, 3> wf1(uc.max_mt_basis_size(), ctx__.num_bands(), ctx__.num_spins());
783 sddk::mdarray<std::complex<double>, 3> wf2(uc.max_mt_basis_size(), ctx__.num_bands(), ctx__.num_spins());
784 #pragma omp for
785 for (auto it : kp__.spinor_wave_functions().spl_num_atoms()) {
786 int ia = it.i;
787 int mt_basis_size = uc.atom(ia).type().mt_basis_size();
788
789 for (int ispn = 0; ispn < ctx__.num_spins(); ispn++) {
790 for (int j = 0; j < kp__.num_occupied_bands(ispn); j++) {
791 for (int xi = 0; xi < mt_basis_size; xi++) {
792 auto z = kp__.spinor_wave_functions().mt_coeffs(xi, it.li, wf::spin_index(ispn),
793 wf::band_index(j));
794 wf1(xi, j, ispn) = std::conj(z);
795 wf2(xi, j, ispn) = static_cast<std::complex<double>>(z) * kp__.band_occupancy(j, ispn) *
796 kp__.weight();
797 }
798 }
799 }
800
801 /* compute diagonal terms */
802 for (int ispn = 0; ispn < ctx__.num_spins(); ispn++) {
803 la::wrap(la::lib_t::blas).gemm('N', 'T', mt_basis_size, mt_basis_size,
804 kp__.num_occupied_bands(ispn), &one, &wf1(0, 0, ispn), wf1.ld(), &wf2(0, 0, ispn), wf2.ld(), &one,
805 density_matrix__[ia].at(sddk::memory_t::host, 0, 0, ispn), density_matrix__[ia].ld());
806 }
807 /* offdiagonal term */
808 if (ctx__.num_mag_dims() == 3) {
809 la::wrap(la::lib_t::blas).gemm('N', 'T', mt_basis_size, mt_basis_size, kp__.num_occupied_bands(),
810 &one, &wf1(0, 0, 0), wf1.ld(), &wf2(0, 0, 1), wf2.ld(), &one,
811 density_matrix__[ia].at(sddk::memory_t::host, 0, 0, 2), density_matrix__[ia].ld());
812 }
813 }
814 }
815}
816
817template <typename T, typename F>
818static void
819add_k_point_contribution_dm_pwpp_collinear(Simulation_context& ctx__, K_point<T>& kp__,
820 beta_projectors_coeffs_t<T>& bp_coeffs__, density_matrix_t& density_matrix__)
821{
822 /* number of beta projectors */
823 int nbeta = bp_coeffs__.beta_chunk_.num_beta_;
824 auto mt = ctx__.processing_unit_memory_t();
825
826 for (int ispn = 0; ispn < ctx__.num_spins(); ispn++) {
827 /* total number of occupied bands for this spin */
828 int nbnd = kp__.num_occupied_bands(ispn);
829 /* compute <beta|psi> */
830 auto beta_psi =
831 inner_prod_beta<F>(ctx__.spla_context(), mt, ctx__.host_memory_t(), sddk::is_device_memory(mt),
832 bp_coeffs__, kp__.spinor_wave_functions(), wf::spin_index(ispn), wf::band_range(0, nbnd));
833
834 /* use communicator of the k-point to split band index */
835 splindex_block<> spl_nbnd(nbnd, n_blocks(kp__.comm().size()), block_id(kp__.comm().rank()));
836
837 int nbnd_loc = spl_nbnd.local_size();
838 if (nbnd_loc) { // TODO: this part can also be moved to GPU
839 #pragma omp parallel
840 {
841 /* auxiliary arrays */
842 sddk::mdarray<std::complex<double>, 2> bp1(nbeta, nbnd_loc);
843 sddk::mdarray<std::complex<double>, 2> bp2(nbeta, nbnd_loc);
844 #pragma omp for
845 for (int ia = 0; ia < bp_coeffs__.beta_chunk_.num_atoms_; ia++) {
846 int nbf = bp_coeffs__.beta_chunk_.desc_(beta_desc_idx::nbf, ia);
847 if (!nbf) {
848 continue;
849 }
850 int offs = bp_coeffs__.beta_chunk_.desc_(beta_desc_idx::offset, ia);
851 int ja = bp_coeffs__.beta_chunk_.desc_(beta_desc_idx::ia, ia);
852
853 for (int i = 0; i < nbnd_loc; i++) {
854 /* global index of band */
855 auto j = spl_nbnd.global_index(i);
856
857 for (int xi = 0; xi < nbf; xi++) {
858 bp1(xi, i) = beta_psi(offs + xi, j);
859 bp2(xi, i) = std::conj(bp1(xi, i)) * kp__.weight() * kp__.band_occupancy(j, ispn);
860 }
861 }
862
863 la::wrap(la::lib_t::blas)
864 .gemm('N', 'T', nbf, nbf, nbnd_loc, &la::constant<std::complex<double>>::one(),
865 &bp1(0, 0), bp1.ld(), &bp2(0, 0), bp2.ld(),
866 &la::constant<std::complex<double>>::one(), &density_matrix__[ja](0, 0, ispn),
867 density_matrix__[ja].ld());
868 }
869 }
870 }
871 } // ispn
872}
873
874template <typename T, typename F>
875static void
876add_k_point_contribution_dm_pwpp_noncollinear(Simulation_context& ctx__, K_point<T>& kp__,
877 beta_projectors_coeffs_t<T>& bp_coeffs__, density_matrix_t& density_matrix__)
878{
879 /* number of beta projectors */
880 int nbeta = bp_coeffs__.beta_chunk_.num_beta_;
881
882 /* total number of occupied bands */
883 int nbnd = kp__.num_occupied_bands();
884
885 splindex_block<> spl_nbnd(nbnd, n_blocks(kp__.comm().size()), block_id(kp__.comm().rank()));
886 int nbnd_loc = spl_nbnd.local_size();
887
888 /* auxiliary arrays */
889 sddk::mdarray<std::complex<double>, 3> bp1(nbeta, nbnd_loc, ctx__.num_spins());
890 sddk::mdarray<std::complex<double>, 3> bp2(nbeta, nbnd_loc, ctx__.num_spins());
891
892 auto& uc = ctx__.unit_cell();
893
894 auto mt = ctx__.processing_unit_memory_t();
895 for (int ispn = 0; ispn < ctx__.num_spins(); ispn++) {
896 /* compute <beta|psi> */
897 auto beta_psi =
898 inner_prod_beta<F>(ctx__.spla_context(), mt, ctx__.host_memory_t(), sddk::is_device_memory(mt),
899 bp_coeffs__, kp__.spinor_wave_functions(), wf::spin_index(ispn), wf::band_range(0, nbnd));
900
901 #pragma omp parallel for schedule(static)
902 for (int i = 0; i < nbnd_loc; i++) {
903 auto j = spl_nbnd.global_index(i);
904
905 for (int m = 0; m < nbeta; m++) {
906 bp1(m, i, ispn) = beta_psi(m, j);
907 bp2(m, i, ispn) = std::conj(beta_psi(m, j));
908 bp2(m, i, ispn) *= kp__.weight() * kp__.band_occupancy(j, 0);
909 }
910 }
911 }
912 for (int ia = 0; ia < bp_coeffs__.beta_chunk_.num_atoms_; ia++) {
913 int nbf = bp_coeffs__.beta_chunk_.desc_(beta_desc_idx::nbf, ia);
914 if (!nbf) {
915 continue;
916 }
917 int offs = bp_coeffs__.beta_chunk_.desc_(beta_desc_idx::offset, ia);
918 int ja = bp_coeffs__.beta_chunk_.desc_(beta_desc_idx::ia, ia);
919 if (uc.atom(ja).type().spin_orbit_coupling()) {
920 sddk::mdarray<std::complex<double>, 3> bp3(nbf, nbnd_loc, 2);
921 bp3.zero();
922 /* We already have the <beta|psi> but we need to rotate
923 * them when the spin orbit interaction is included in the
924 * pseudo potential.
925 *
926 * We rotate \f[\langle\beta|\psi\rangle\f] accordingly by multiplying it with
927 * the \f[f^{\sigma\sigma^{'}}_{\xi,\xi^'}\f]
928 */
929
930 for (int xi1 = 0; xi1 < nbf; xi1++) {
931 for (int i = 0; i < nbnd_loc; i++) {
932 for (int xi1p = 0; xi1p < nbf; xi1p++) {
933 if (uc.atom(ja).type().compare_index_beta_functions(xi1, xi1p)) {
934 bp3(xi1, i, 0) +=
935 bp1(offs + xi1p, i, 0) *
936 uc.atom(ja).type().f_coefficients(xi1, xi1p, 0, 0) +
937 bp1(offs + xi1p, i, 1) *
938 uc.atom(ja).type().f_coefficients(xi1, xi1p, 0, 1);
939 bp3(xi1, i, 1) +=
940 bp1(offs + xi1p, i, 0) *
941 uc.atom(ja).type().f_coefficients(xi1, xi1p, 1, 0) +
942 bp1(offs + xi1p, i, 1) *
943 uc.atom(ja).type().f_coefficients(xi1, xi1p, 1, 1);
944 }
945 }
946 }
947 }
948
949 for (int xi1 = 0; xi1 < nbf; xi1++) {
950 for (int i = 0; i < nbnd_loc; i++) {
951 bp1(offs + xi1, i, 0) = bp3(xi1, i, 0);
952 bp1(offs + xi1, i, 1) = bp3(xi1, i, 1);
953 }
954 }
955
956 bp3.zero();
957
958 for (int xi1 = 0; xi1 < nbf; xi1++) {
959 for (int i = 0; i < nbnd_loc; i++) {
960 for (int xi1p = 0; xi1p < nbf; xi1p++) {
961 if (uc.atom(ja).type().compare_index_beta_functions(xi1, xi1p)) {
962 bp3(xi1, i, 0) +=
963 bp2(offs + xi1p, i, 0) *
964 uc.atom(ja).type().f_coefficients(xi1p, xi1, 0, 0) +
965 bp2(offs + xi1p, i, 1) *
966 uc.atom(ja).type().f_coefficients(xi1p, xi1, 1, 0);
967 bp3(xi1, i, 1) +=
968 bp2(offs + xi1p, i, 0) *
969 uc.atom(ja).type().f_coefficients(xi1p, xi1, 0, 1) +
970 bp2(offs + xi1p, i, 1) *
971 uc.atom(ja).type().f_coefficients(xi1p, xi1, 1, 1);
972 }
973 }
974 }
975 }
976
977 for (int xi1 = 0; xi1 < nbf; xi1++) {
978 for (int i = 0; i < nbnd_loc; i++) {
979 bp2(offs + xi1, i, 0) = bp3(xi1, i, 0);
980 bp2(offs + xi1, i, 1) = bp3(xi1, i, 1);
981 }
982 }
983 }
984 }
985
986 if (nbnd_loc) {
987 #pragma omp parallel for
988 for (int ia = 0; ia < bp_coeffs__.beta_chunk_.num_atoms_; ia++) {
989 int nbf = bp_coeffs__.beta_chunk_.desc_(beta_desc_idx::nbf, ia);
990 int offs = bp_coeffs__.beta_chunk_.desc_(beta_desc_idx::offset, ia);
991 int ja = bp_coeffs__.beta_chunk_.desc_(beta_desc_idx::ia, ia);
992 /* compute diagonal spin blocks */
993 for (int ispn = 0; ispn < 2; ispn++) {
994 la::wrap(la::lib_t::blas)
995 .gemm('N', 'T', nbf, nbf, nbnd_loc, &la::constant<std::complex<double>>::one(),
996 &bp1(offs, 0, ispn), bp1.ld(), &bp2(offs, 0, ispn), bp2.ld(),
997 &la::constant<std::complex<double>>::one(), &density_matrix__[ja](0, 0, ispn),
998 density_matrix__[ja].ld());
999 }
1000 /* off-diagonal spin block */
1001 la::wrap(la::lib_t::blas)
1002 .gemm('N', 'T', nbf, nbf, nbnd_loc, &la::constant<std::complex<double>>::one(), &bp1(offs, 0, 0),
1003 bp1.ld(), &bp2(offs, 0, 1), bp2.ld(), &la::constant<std::complex<double>>::one(),
1004 &density_matrix__[ja](0, 0, 2), density_matrix__[ja].ld());
1005 }
1006 }
1007}
1008
1009template <typename T, typename F>
1010static void
1011add_k_point_contribution_dm_pwpp(Simulation_context& ctx__, K_point<T>& kp__, density_matrix_t& density_matrix__)
1012{
1013 PROFILE("sirius::add_k_point_contribution_dm_pwpp");
1014
1015 if (!ctx__.unit_cell().max_mt_basis_size()) {
1016 return;
1017 }
1018
1019 auto bp_gen = kp__.beta_projectors().make_generator();
1020 auto bp_coeffs = bp_gen.prepare();
1021
1022 for (int ichunk = 0; ichunk < kp__.beta_projectors().num_chunks(); ichunk++) {
1023 // kp__.beta_projectors().generate(ctx__.processing_unit_memory_t(), ichunk);
1024 bp_gen.generate(bp_coeffs, ichunk);
1025
1026 if (ctx__.num_mag_dims() != 3) {
1027 add_k_point_contribution_dm_pwpp_collinear<T, F>(ctx__, kp__, bp_coeffs, density_matrix__);
1028 } else {
1029 add_k_point_contribution_dm_pwpp_noncollinear<T, F>(ctx__, kp__, bp_coeffs, density_matrix__);
1030 }
1031 }
1032}
1033
1034void
1035Density::normalize()
1036{
1037 double nel = std::get<0>(rho().integrate());
1038 double scale = unit_cell_.num_electrons() / nel;
1039
1040 /* renormalize interstitial part */
1041 for (int ir = 0; ir < ctx_.spfft<double>().local_slice_size(); ir++) {
1042 rho().rg().value(ir) *= scale;
1043 }
1044 if (ctx_.full_potential()) {
1045 for (int ia = 0; ia < unit_cell_.num_atoms(); ia++) {
1046 for (int ir = 0; ir < unit_cell_.atom(ia).num_mt_points(); ir++) {
1047 for (int lm = 0; lm < ctx_.lmmax_rho(); lm++) {
1048 rho().mt()[ia](lm, ir) *= scale;
1049 }
1050 }
1051 }
1052 }
1053}
1054
1055/// Check total density for the correct number of electrons.
1056bool
1058{
1059 double nel{0};
1060 if (ctx_.full_potential()) {
1061 nel = std::get<0>(rho().integrate());
1062 } else {
1063 nel = rho().rg().f_0().real() * unit_cell_.omega();
1064 }
1065
1066 /* check the number of electrons */
1067 if (std::abs(nel - unit_cell_.num_electrons()) > 1e-5 && ctx_.comm().rank() == 0) {
1068 std::stringstream s;
1069 s << "wrong number of electrons" << std::endl
1070 << " obtained value : " << nel << std::endl
1071 << " target value : " << unit_cell_.num_electrons() << std::endl
1072 << " difference : " << std::abs(nel - unit_cell_.num_electrons()) << std::endl;
1073 if (ctx_.full_potential()) {
1074 s << " total core leakage : " << core_leakage();
1075 for (int ic = 0; ic < unit_cell_.num_atom_symmetry_classes(); ic++) {
1076 s << std::endl << " atom class : " << ic << ", core leakage : " << core_leakage(ic);
1077 }
1078 }
1079 RTE_WARNING(s);
1080 return false;
1081 } else {
1082 return true;
1083 }
1084}
1085
1086template <typename T>
1087void
1088Density::generate(K_point_set const& ks__, bool symmetrize__, bool add_core__, bool transform_to_rg__)
1089{
1090 PROFILE("sirius::Density::generate");
1091
1092 generate_valence<T>(ks__);
1093
1094 if (ctx_.full_potential()) {
1095 if (add_core__) {
1096 /* find the core states */
1098 /* add core contribution */
1099 for (auto it : unit_cell_.spl_num_atoms()) {
1100 for (int ir = 0; ir < unit_cell_.atom(it.i).num_mt_points(); ir++) {
1101 rho().mt()[it.i](0, ir) +=
1102 unit_cell_.atom(it.i).symmetry_class().ae_core_charge_density(ir) / y00;
1103 }
1104 }
1105 }
1106 /* synchronize muffin-tin part */
1107 for (int iv = 0; iv < ctx_.num_mag_dims() + 1; iv++) {
1108 this->component(iv).mt().sync(ctx_.unit_cell().spl_num_atoms());
1109 }
1110 }
1111 if (symmetrize__) {
1112 symmetrize_field4d(*this);
1113 if (ctx_.electronic_structure_method() == electronic_structure_method_t::pseudopotential) {
1114 std::unique_ptr<density_matrix_t> dm_ref;
1115 std::unique_ptr<Occupation_matrix> om_ref;
1116
1117 /* copy density matrix for future comparison */
1118 if (ctx_.cfg().control().verification() >= 1 && ctx_.cfg().parameters().use_ibz() == false) {
1119 dm_ref = std::make_unique<density_matrix_t>(unit_cell_, ctx_.num_mag_comp());
1120 copy(*density_matrix_, *dm_ref);
1121 }
1122 if (ctx_.cfg().control().verification() >= 1 && ctx_.cfg().parameters().use_ibz() == false &&
1124 om_ref = std::make_unique<Occupation_matrix>(ctx_);
1125 copy(*occupation_matrix_, *om_ref);
1126 }
1127
1128 /* symmetrize density matrix (used in standard uspp case) */
1129 if (unit_cell_.max_mt_basis_size() != 0) {
1131 }
1132
1133 if (occupation_matrix_) {
1134 /* all symmetrization is done in the occupation_matrix class */
1135 symmetrize_occupation_matrix(*occupation_matrix_);
1136 }
1137
1138 /* compare with reference density matrix */
1139 if (ctx_.cfg().control().verification() >= 1 && ctx_.cfg().parameters().use_ibz() == false) {
1140 double diff{0};
1141 for (int ia = 0; ia < unit_cell_.num_atoms(); ia++) {
1142 for (size_t i = 0; i < (*density_matrix_)[ia].size(); i++) {
1143 diff = std::max(diff, std::abs((*dm_ref)[ia][i] - (*density_matrix_)[ia][i]));
1144 }
1145 }
1146 std::string status = (diff > 1e-8) ? "Fail" : "OK";
1147 if (ctx_.verbosity() >= 1) {
1148 RTE_OUT(ctx_.out()) << "error of density matrix symmetrization: " << diff << " "
1149 << status << std::endl;
1150 }
1151 }
1152 /* compare with reference occupation matrix */
1153 if (ctx_.cfg().control().verification() >= 1 && ctx_.cfg().parameters().use_ibz() == false &&
1155 double diff1{0};
1156 for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
1157 if (ctx_.unit_cell().atom(ia).type().hubbard_correction()) {
1158 for (size_t i = 0; i < occupation_matrix_->local(ia).size(); i++) {
1159 diff1 = std::max(diff1, std::abs(om_ref->local(ia)[i] - occupation_matrix_->local(ia)[i]));
1160 }
1161 }
1162 }
1163 std::string status = (diff1 > 1e-8) ? "Fail" : "OK";
1164 if (ctx_.verbosity() >= 1) {
1165 RTE_OUT(ctx_.out()) << "error of LDA+U local occupation matrix symmetrization: " << diff1
1166 << " " << status << std::endl;
1167 }
1168
1169 om_ref->update_nonlocal();
1170
1171 double diff2{0};
1172 for (size_t i = 0; i < occupation_matrix_->nonlocal().size(); i++) {
1173 for (size_t j = 0; j < occupation_matrix_->nonlocal(i).size(); j++) {
1174 diff2 = std::max(diff2, std::abs(om_ref->nonlocal(i)[j] - occupation_matrix_->nonlocal(i)[j]));
1175 }
1176 }
1177 status = (diff2 > 1e-8) ? "Fail" : "OK";
1178 if (ctx_.verbosity() >= 1) {
1179 RTE_OUT(ctx_.out()) << "error of LDA+U nonlocal occupation matrix symmetrization: " << diff2
1180 << " " << status << std::endl;
1181 }
1182 }
1183 }
1184 } else { /* if we don't symmetrize, we still need to copy nonlocal part of occupation matrix */
1185 if (occupation_matrix_) {
1186 occupation_matrix_->update_nonlocal();
1187 }
1188 }
1189
1190 if (occupation_matrix_) {
1191 occupation_matrix_->print_occupancies(2);
1192 }
1193
1195
1196 if (transform_to_rg__) {
1197 this->fft_transform(1);
1198 }
1199}
1200
1201template void Density::generate<double>(K_point_set const& ks__, bool symmetrize__, bool add_core__,
1202 bool transform_to_rg__);
1203#if defined(SIRIUS_USE_FP32)
1204template void Density::generate<float>(K_point_set const& ks__, bool symmetrize__, bool add_core__,
1205 bool transform_to_rg__);
1206#endif
1207
1208void
1210{
1211 PROFILE("sirius::Density::augment");
1212
1213 /* check if we need to augment charge density and magnetization */
1214 if (!unit_cell_.augment()) {
1215 return;
1216 }
1217
1218 auto rho_aug = generate_rho_aug();
1219
1220 for (int iv = 0; iv < ctx_.num_mag_dims() + 1; iv++) {
1221 #pragma omp parallel for schedule(static)
1222 for (int igloc = 0; igloc < ctx_.gvec().count(); igloc++) {
1223 this->component(iv).rg().f_pw_local(igloc) += rho_aug(igloc, iv);
1224 }
1225 }
1226}
1227
1228template <typename T>
1229void
1231{
1232 PROFILE("sirius::Density::generate_valence");
1233
1234 /* check weights */
1235 double wt{0};
1236 double occ_val{0};
1237 for (int ik = 0; ik < ks__.num_kpoints(); ik++) {
1238 wt += ks__.get<T>(ik)->weight();
1239 for (int ispn = 0; ispn < ctx_.num_spinors(); ispn++) {
1240 for (int j = 0; j < ctx_.num_bands(); j++) {
1241 occ_val += ks__.get<T>(ik)->weight() * ks__.get<T>(ik)->band_occupancy(j, ispn);
1242 }
1243 }
1244 }
1245
1246 if (std::abs(wt - 1.0) > 1e-12) {
1247 std::stringstream s;
1248 s << "K_point weights don't sum to one" << std::endl << " obtained sum: " << wt;
1249 RTE_THROW(s);
1250 }
1251
1252 if (std::abs(occ_val - unit_cell_.num_valence_electrons() + ctx_.cfg().parameters().extra_charge()) > 1e-8 &&
1253 ctx_.comm().rank() == 0) {
1254 std::stringstream s;
1255 s << "wrong band occupancies" << std::endl
1256 << " computed : " << occ_val << std::endl
1257 << " required : " << unit_cell_.num_valence_electrons() - ctx_.cfg().parameters().extra_charge() << std::endl
1258 << " difference : "
1259 << std::abs(occ_val - unit_cell_.num_valence_electrons() + ctx_.cfg().parameters().extra_charge());
1260 RTE_WARNING(s);
1261 }
1262
1263 density_matrix_->zero();
1264
1265 if (occupation_matrix_) {
1266 occupation_matrix_->zero();
1267 }
1268
1269 /* zero density and magnetization */
1270 zero();
1271 for (int i = 0; i < ctx_.num_mag_dims() + 1; i++) {
1272 rho_mag_coarse_[i]->zero();
1273 }
1274
1275 auto mem = ctx_.processing_unit() == sddk::device_t::CPU ? sddk::memory_t::host : sddk::memory_t::device;
1276
1277 /* start the main loop over k-points */
1278 for (auto it : ks__.spl_num_kpoints()) {
1279 auto kp = ks__.get<T>(it.i);
1280
1281 std::array<wf::Wave_functions_fft<T>, 2> wf_fft;
1282
1283 std::vector<wf::device_memory_guard> mg;
1284
1285 mg.emplace_back(kp->spinor_wave_functions().memory_guard(mem, wf::copy_to::device));
1286 if (ctx_.hubbard_correction()) {
1287 mg.emplace_back(kp->hubbard_wave_functions_S().memory_guard(mem, wf::copy_to::device));
1288 }
1289
1290 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
1291 int nbnd = kp->num_occupied_bands(ispn);
1292 /* swap wave functions for the FFT transformation */
1293 wf_fft[ispn] = wf::Wave_functions_fft<T>(kp->gkvec_fft_sptr(), kp->spinor_wave_functions(),
1295 }
1296
1297 if (ctx_.full_potential()) {
1298 add_k_point_contribution_dm_fplapw<T>(ctx_, *kp, *density_matrix_);
1299 } else {
1300 if (ctx_.gamma_point() && (ctx_.so_correction() == false)) {
1301 add_k_point_contribution_dm_pwpp<T, T>(ctx_, *kp, *density_matrix_);
1302 } else {
1303 add_k_point_contribution_dm_pwpp<T, std::complex<T>>(ctx_, *kp, *density_matrix_);
1304 }
1305 if (occupation_matrix_) {
1306 occupation_matrix_->add_k_point_contribution(*kp);
1307 }
1308 }
1309
1310 /* add contribution from regular space grid */
1311 add_k_point_contribution_rg(kp, wf_fft);
1312 }
1313
1314 for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
1315 ctx_.comm().allreduce(density_matrix(ia).at(sddk::memory_t::host), static_cast<int>(density_matrix(ia).size()));
1316 }
1317
1318 if (occupation_matrix_) {
1319 occupation_matrix_->reduce();
1320 }
1321
1322 auto& comm = ctx_.gvec_coarse_fft_sptr()->comm_ortho_fft();
1323 for (int j = 0; j < ctx_.num_mag_dims() + 1; j++) {
1324 auto ptr = (ctx_.spfft_coarse<double>().local_slice_size() == 0) ? nullptr : &rho_mag_coarse_[j]->value(0);
1325 /* reduce arrays; assume that each rank did its own fraction of the density */
1326 /* comm_ortho_fft is identical to a product of column communicator inside k-point with k-point communicator */
1327 comm.allreduce(ptr, ctx_.spfft_coarse<double>().local_slice_size());
1328 /* print checksum if needed */
1329 if (env::print_checksum()) {
1330 auto cs = sddk::mdarray<double, 1>(ptr, ctx_.spfft_coarse<double>().local_slice_size()).checksum();
1331 mpi::Communicator(ctx_.spfft_coarse<double>().communicator()).allreduce(&cs, 1);
1332 print_checksum("rho_mag_coarse_rg", cs, ctx_.out());
1333 }
1334 /* transform to PW domain */
1335 rho_mag_coarse_[j]->fft_transform(-1);
1336 /* map to fine G-vector grid */
1337 for (int igloc = 0; igloc < ctx_.gvec_coarse().count(); igloc++) {
1338 component(j).rg().f_pw_local(ctx_.gvec().gvec_base_mapping(igloc)) = rho_mag_coarse_[j]->f_pw_local(igloc);
1339 }
1340 }
1341
1342 if (!ctx_.full_potential()) {
1343 augment();
1344
1345 /* remove extra chanrge */
1346 if (ctx_.gvec().comm().rank() == 0) {
1347 rho().rg().f_pw_local(0) += ctx_.cfg().parameters().extra_charge() / ctx_.unit_cell().omega();
1348 }
1349
1350 if (env::print_hash()) {
1351 auto h = sddk::mdarray<std::complex<double>, 1>(&rho().rg().f_pw_local(0), ctx_.gvec().count()).hash();
1352 print_hash("rho", h, ctx_.out());
1353 }
1354
1355 double nel = rho().rg().f_0().real() * unit_cell_.omega();
1356 /* check the number of electrons */
1357 if (std::abs(nel - unit_cell_.num_electrons()) > 1e-8 && ctx_.comm().rank() == 0) {
1358 std::stringstream s;
1359 s << "wrong unsymmetrized density" << std::endl
1360 << " obtained value : " << std::scientific << nel << std::endl
1361 << " target value : " << std::scientific << unit_cell_.num_electrons() << std::endl
1362 << " difference : " << std::scientific << std::abs(nel - unit_cell_.num_electrons()) << std::endl;
1363 RTE_WARNING(s);
1364 }
1365 }
1366
1367 /* for muffin-tin part */
1368 if (ctx_.full_potential()) {
1370 }
1371}
1372
1375{
1376 PROFILE("sirius::Density::generate_rho_aug");
1377
1378 /* local number of G-vectors */
1379 int gvec_count = ctx_.gvec().count();
1380 auto spl_ngv_loc = split_in_blocks(gvec_count, ctx_.cfg().control().gvec_chunk_size());
1381
1382 auto& mph = get_memory_pool(sddk::memory_t::host);
1383 sddk::memory_pool* mpd{nullptr};
1384
1385 sddk::mdarray<std::complex<double>, 2> rho_aug(gvec_count, ctx_.num_mag_dims() + 1, mph);
1386
1387 switch (ctx_.processing_unit()) {
1388 case sddk::device_t::CPU: {
1389 rho_aug.zero(sddk::memory_t::host);
1390 break;
1391 }
1392 case sddk::device_t::GPU: {
1393 mpd = &get_memory_pool(sddk::memory_t::device);
1394 rho_aug.allocate(*mpd).zero(sddk::memory_t::device);
1395 break;
1396 }
1397 }
1398
1399 /* add contribution to Q(G) from atoms of all types */
1400 for (int iat = 0; iat < unit_cell_.num_atom_types(); iat++) {
1401 auto& atom_type = unit_cell_.atom_type(iat);
1402
1403 if (!atom_type.augment() || atom_type.num_atoms() == 0) {
1404 continue;
1405 }
1406
1407 /* number of beta-projector functions */
1408 int nbf = atom_type.mt_basis_size();
1409 /* number of Q_{xi,xi'} components for each G */
1410 int nqlm = nbf * (nbf + 1) / 2;
1411
1412 /* convert to real matrix */
1413 auto dm = density_matrix_aux(atom_type);
1414
1415 if (env::print_checksum()) {
1416 auto cs = dm.checksum();
1417 print_checksum("density_matrix_aux", cs, ctx_.out());
1418 }
1419
1420 int ndm_pw = (ctx_.processing_unit() == sddk::device_t::CPU) ? 1 : ctx_.num_mag_dims() + 1;
1421 /* treat auxiliary array as real with x2 size */
1422 sddk::mdarray<double, 3> dm_pw(nqlm, spl_ngv_loc[0] * 2, ndm_pw, mph);
1423 sddk::mdarray<double, 2> phase_factors(atom_type.num_atoms(), spl_ngv_loc[0] * 2, mph);
1424
1425 print_memory_usage(ctx_.out(), FILE_LINE);
1426
1427 switch (ctx_.processing_unit()) {
1428 case sddk::device_t::CPU: {
1429 break;
1430 }
1431 case sddk::device_t::GPU: {
1432 phase_factors.allocate(*mpd);
1433 dm_pw.allocate(*mpd);
1434 dm.allocate(*mpd).copy_to(sddk::memory_t::device);
1435 break;
1436 }
1437 }
1438
1439 print_memory_usage(ctx_.out(), FILE_LINE);
1440
1441 auto qpw = (ctx_.processing_unit() == sddk::device_t::CPU) ? sddk::mdarray<double, 2>() :
1442 sddk::mdarray<double, 2>(nqlm, 2 * spl_ngv_loc[0], *mpd, "qpw");
1443
1444 int g_begin{0};
1445 /* loop over blocks of G-vectors */
1446 for (auto ng : spl_ngv_loc) {
1447
1448 /* work on the block of the local G-vectors */
1449 switch (ctx_.processing_unit()) {
1450 case sddk::device_t::CPU: {
1451 /* generate phase factors */
1452 #pragma omp parallel for
1453 for (int g = 0; g < ng; g++) {
1454 int ig = ctx_.gvec().offset() + g_begin + g;
1455 for (int i = 0; i < atom_type.num_atoms(); i++) {
1456 int ia = atom_type.atom_id(i);
1457 auto z = std::conj(ctx_.gvec_phase_factor(ig, ia));
1458 phase_factors(i, 2 * g) = z.real();
1459 phase_factors(i, 2 * g + 1) = z.imag();
1460 }
1461 }
1462 for (int iv = 0; iv < ctx_.num_mag_dims() + 1; iv++) {
1463 PROFILE_START("sirius::Density::generate_rho_aug|gemm");
1464 la::wrap(la::lib_t::blas).gemm('N', 'N', nqlm, 2 * ng, atom_type.num_atoms(),
1466 dm.at(sddk::memory_t::host, 0, 0, iv), dm.ld(),
1467 phase_factors.at(sddk::memory_t::host), phase_factors.ld(),
1469 dm_pw.at(sddk::memory_t::host, 0, 0, 0), dm_pw.ld());
1470 PROFILE_STOP("sirius::Density::generate_rho_aug|gemm");
1471 PROFILE_START("sirius::Density::generate_rho_aug|sum");
1472 #pragma omp parallel for
1473 for (int g = 0; g < ng; g++) {
1474 int igloc = g_begin + g;
1475 std::complex<double> zsum(0, 0);
1476 /* get contribution from non-diagonal terms */
1477 for (int i = 0; i < nqlm; i++) {
1478 std::complex<double> z1(ctx_.augmentation_op(iat).q_pw(i, 2 * igloc),
1479 ctx_.augmentation_op(iat).q_pw(i, 2 * igloc + 1));
1480 std::complex<double> z2(dm_pw(i, 2 * g, 0),
1481 dm_pw(i, 2 * g + 1, 0));
1482
1483 zsum += z1 * z2 * ctx_.augmentation_op(iat).sym_weight(i);
1484 }
1485 /* add contribution from atoms of a given type */
1486 rho_aug(igloc, iv) += zsum;
1487 }
1488 PROFILE_STOP("sirius::Density::generate_rho_aug|sum");
1489 }
1490 break;
1491 }
1492 case sddk::device_t::GPU: {
1493#if defined(SIRIUS_GPU)
1494 acc::copyin(qpw.at(sddk::memory_t::device),
1495 ctx_.augmentation_op(iat).q_pw().at(sddk::memory_t::host, 0, 2 * g_begin), 2 * ng * nqlm);
1496
1497 for (int iv = 0; iv < ctx_.num_mag_dims() + 1; iv++) {
1498 generate_dm_pw_gpu(atom_type.num_atoms(), ng, nbf,
1499 ctx_.unit_cell().atom_coord(iat).at(sddk::memory_t::device),
1500 ctx_.gvec_coord().at(sddk::memory_t::device, g_begin, 0),
1501 ctx_.gvec_coord().at(sddk::memory_t::device, g_begin, 1),
1502 ctx_.gvec_coord().at(sddk::memory_t::device, g_begin, 2),
1503 phase_factors.at(sddk::memory_t::device), dm.at(sddk::memory_t::device, 0, 0, iv),
1504 dm_pw.at(sddk::memory_t::device, 0, 0, iv), 1 + iv);
1505 sum_q_pw_dm_pw_gpu(ng, nbf, qpw.at(sddk::memory_t::device), qpw.ld(),
1506 dm_pw.at(sddk::memory_t::device, 0, 0, iv), dm_pw.ld(),
1507 ctx_.augmentation_op(iat).sym_weight().at(sddk::memory_t::device),
1508 rho_aug.at(sddk::memory_t::device, g_begin, iv), 1 + iv);
1509 }
1510 for (int iv = 0; iv < ctx_.num_mag_dims() + 1; iv++) {
1512 }
1513#endif
1514 break;
1515 }
1516 } // switch (pu)
1517
1518 g_begin += ng;
1519 }
1520 }
1521
1522 if (ctx_.processing_unit() == sddk::device_t::GPU) {
1523 rho_aug.copy_to(sddk::memory_t::host);
1524 }
1525
1526 if (env::print_checksum()) {
1527 auto cs = rho_aug.checksum();
1528 ctx_.comm().allreduce(&cs, 1);
1529 print_checksum("rho_aug", cs, ctx_.out());
1530 }
1531
1532 if (env::print_hash()) {
1533 auto h = rho_aug.hash();
1534 print_hash("rho_aug", h, ctx_.out());
1535 }
1536
1537 return rho_aug;
1538}
1539
1540template <int num_mag_dims>
1541void Density::reduce_density_matrix(Atom_type const& atom_type__, sddk::mdarray<std::complex<double>, 3> const& zdens__,
1542 sddk::mdarray<double, 3>& mt_density_matrix__)
1543{
1544 mt_density_matrix__.zero();
1545
1546 #pragma omp parallel for default(shared)
1547 for (int idxrf2 = 0; idxrf2 < atom_type__.mt_radial_basis_size(); idxrf2++) {
1548 int l2 = atom_type__.indexr(idxrf2).am.l();
1549 for (int idxrf1 = 0; idxrf1 <= idxrf2; idxrf1++) {
1550 int offs = idxrf2 * (idxrf2 + 1) / 2 + idxrf1;
1551 int l1 = atom_type__.indexr(idxrf1).am.l();
1552
1553 int xi2 = atom_type__.indexb().index_of(rf_index(idxrf2));
1554 for (int lm2 = sf::lm(l2, -l2); lm2 <= sf::lm(l2, l2); lm2++, xi2++) {
1555 int xi1 = atom_type__.indexb().index_of(rf_index(idxrf1));
1556 for (int lm1 = sf::lm(l1, -l1); lm1 <= sf::lm(l1, l1); lm1++, xi1++) {
1557 for (int k = 0; k < atom_type__.gaunt_coefs().num_gaunt(lm1, lm2); k++) {
1558 int lm3 = atom_type__.gaunt_coefs().gaunt(lm1, lm2, k).lm3;
1559 auto gc = atom_type__.gaunt_coefs().gaunt(lm1, lm2, k).coef;
1560 switch (num_mag_dims) {
1561 case 3: {
1562 mt_density_matrix__(lm3, offs, 2) += 2.0 * std::real(zdens__(xi1, xi2, 2) * gc);
1563 mt_density_matrix__(lm3, offs, 3) -= 2.0 * std::imag(zdens__(xi1, xi2, 2) * gc);
1564 }
1565 case 1: {
1566 mt_density_matrix__(lm3, offs, 1) += std::real(zdens__(xi1, xi2, 1) * gc);
1567 }
1568 case 0: {
1569 mt_density_matrix__(lm3, offs, 0) += std::real(zdens__(xi1, xi2, 0) * gc);
1570 }
1571 }
1572 }
1573 }
1574 }
1575 }
1576 }
1577}
1578
1579void
1581{
1582 PROFILE("sirius::Density::generate_valence_mt");
1583
1584 /* compute occupation matrix */
1585 if (ctx_.hubbard_correction()) {
1586 RTE_THROW("LDA+U in LAPW is not implemented");
1587
1588 // TODO: fix the way how occupation matrix is calculated
1589
1590 // Timer t3("sirius::Density::generate:om");
1591 //
1592 // mdarray<std::complex<double>, 4> occupation_matrix(16, 16, 2, 2);
1593 //
1594 // for (int ialoc = 0; ialoc < unit_cell_.spl_num_atoms().local_size(); ialoc++)
1595 //{
1596 // int ia = unit_cell_.spl_num_atoms(ialoc);
1597 // Atom_type* type = unit_cell_.atom(ia)->type();
1598 //
1599 // occupation_matrix.zero();
1600 // for (int l = 0; l <= 3; l++)
1601 // {
1602 // int num_rf = type->indexr().num_rf(l);
1603
1604 // for (int j = 0; j < num_zdmat; j++)
1605 // {
1606 // for (int order2 = 0; order2 < num_rf; order2++)
1607 // {
1608 // for (int lm2 = Utils::lm_by_l_m(l, -l); lm2 <= Utils::lm_by_l_m(l, l); lm2++)
1609 // {
1610 // for (int order1 = 0; order1 < num_rf; order1++)
1611 // {
1612 // for (int lm1 = Utils::lm_by_l_m(l, -l); lm1 <= Utils::lm_by_l_m(l, l); lm1++)
1613 // {
1614 // occupation_matrix(lm1, lm2, dmat_spins_[j].first, dmat_spins_[j].second) +=
1615 // mt_complex_density_matrix_loc(type->indexb_by_lm_order(lm1, order1),
1616 // type->indexb_by_lm_order(lm2, order2), j, ialoc) *
1617 // unit_cell_.atom(ia)->symmetry_class()->o_radial_integral(l, order1, order2);
1618 // }
1619 // }
1620 // }
1621 // }
1622 // }
1623 // }
1624 //
1625 // // restore the du block
1626 // for (int lm1 = 0; lm1 < 16; lm1++)
1627 // {
1628 // for (int lm2 = 0; lm2 < 16; lm2++)
1629 // occupation_matrix(lm2, lm1, 1, 0) = conj(occupation_matrix(lm1, lm2, 0, 1));
1630 // }
1631
1632 // unit_cell_.atom(ia)->set_occupation_matrix(&occupation_matrix(0, 0, 0, 0));
1633 //}
1634
1635 // for (int ia = 0; ia < unit_cell_.num_atoms(); ia++)
1636 //{
1637 // int rank = unit_cell_.spl_num_atoms().local_rank(ia);
1638 // unit_cell_.atom(ia)->sync_occupation_matrix(ctx_.comm(), rank);
1639 //}
1640 }
1641
1642 int max_num_rf_pairs = unit_cell_.max_mt_radial_basis_size() * (unit_cell_.max_mt_radial_basis_size() + 1) / 2;
1643
1644 // real density matrix
1645 sddk::mdarray<double, 3> mt_density_matrix(ctx_.lmmax_rho(), max_num_rf_pairs, ctx_.num_mag_dims() + 1);
1646
1647 sddk::mdarray<double, 2> rf_pairs(unit_cell_.max_num_mt_points(), max_num_rf_pairs);
1648 sddk::mdarray<double, 3> dlm(ctx_.lmmax_rho(), unit_cell_.max_num_mt_points(), ctx_.num_mag_dims() + 1);
1649
1650 for (auto it : unit_cell_.spl_num_atoms()) {
1651 auto& atom_type = unit_cell_.atom(it.i).type();
1652
1653 int nmtp = atom_type.num_mt_points();
1654 int num_rf_pairs = atom_type.mt_radial_basis_size() * (atom_type.mt_radial_basis_size() + 1) / 2;
1655
1656 PROFILE_START("sirius::Density::generate|sum_zdens");
1657 switch (ctx_.num_mag_dims()) {
1658 case 3: {
1659 reduce_density_matrix<3>(atom_type, density_matrix(it.i), mt_density_matrix);
1660 break;
1661 }
1662 case 1: {
1663 reduce_density_matrix<1>(atom_type, density_matrix(it.i), mt_density_matrix);
1664 break;
1665 }
1666 case 0: {
1667 reduce_density_matrix<0>(atom_type, density_matrix(it.i), mt_density_matrix);
1668 break;
1669 }
1670 }
1671 PROFILE_STOP("sirius::Density::generate|sum_zdens");
1672
1673 PROFILE("sirius::Density::generate|expand_lm");
1674 /* collect radial functions */
1675 for (int idxrf2 = 0; idxrf2 < atom_type.mt_radial_basis_size(); idxrf2++) {
1676 int offs = idxrf2 * (idxrf2 + 1) / 2;
1677 for (int idxrf1 = 0; idxrf1 <= idxrf2; idxrf1++) {
1678 /* off-diagonal pairs are taken two times: d_{12}*f_1*f_2 + d_{21}*f_2*f_1 = d_{12}*2*f_1*f_2 */
1679 int n = (idxrf1 == idxrf2) ? 1 : 2;
1680 for (int ir = 0; ir < unit_cell_.atom(it.i).num_mt_points(); ir++) {
1681 rf_pairs(ir, offs + idxrf1) = n * unit_cell_.atom(it.i).symmetry_class().radial_function(ir, idxrf1) *
1682 unit_cell_.atom(it.i).symmetry_class().radial_function(ir, idxrf2);
1683 }
1684 }
1685 }
1686 for (int j = 0; j < ctx_.num_mag_dims() + 1; j++) {
1688 .gemm('N', 'T', ctx_.lmmax_rho(), nmtp, num_rf_pairs, &la::constant<double>::one(),
1689 &mt_density_matrix(0, 0, j), mt_density_matrix.ld(), &rf_pairs(0, 0), rf_pairs.ld(),
1690 &la::constant<double>::zero(), &dlm(0, 0, j), dlm.ld());
1691 }
1692
1693 auto sz = ctx_.lmmax_rho() * nmtp;
1694 switch (ctx_.num_mag_dims()) {
1695 case 3: {
1696 /* copy x component */
1697 std::copy(&dlm(0, 0, 2), &dlm(0, 0, 2) + sz, &mag(1).mt()[it.i](0, 0));
1698 /* copy y component */
1699 std::copy(&dlm(0, 0, 3), &dlm(0, 0, 3) + sz, &mag(2).mt()[it.i](0, 0));
1700 }
1701 case 1: {
1702 for (int ir = 0; ir < nmtp; ir++) {
1703 for (int lm = 0; lm < ctx_.lmmax_rho(); lm++) {
1704 rho().mt()[it.i](lm, ir) = dlm(lm, ir, 0) + dlm(lm, ir, 1);
1705 mag(0).mt()[it.i](lm, ir) = dlm(lm, ir, 0) - dlm(lm, ir, 1);
1706 }
1707 }
1708 break;
1709 }
1710 case 0: {
1711 std::copy(&dlm(0, 0, 0), &dlm(0, 0, 0) + sz, &rho().mt()[it.i](0, 0));
1712 }
1713 }
1714 }
1715}
1716
1719{
1720 PROFILE("sirius::Density::compute_atomic_mag_mom");
1721
1723 mmom.zero();
1724
1725 #pragma omp parallel for
1726 for (int ia = 0; ia < unit_cell_.num_atoms(); ia++) {
1727
1728 auto& atom_to_grid_map = ctx_.atoms_to_grid_idx_map(ia);
1729
1730 for (auto coord : atom_to_grid_map) {
1731 int ir = coord.first;
1732 for (int j = 0; j < ctx_.num_mag_dims(); j++) {
1733 mmom(j, ia) += mag(j).rg().value(ir);
1734 }
1735 }
1736
1737 for (int j : {0, 1, 2}) {
1738 mmom(j, ia) *= (unit_cell_.omega() / fft::spfft_grid_size(ctx_.spfft<double>()));
1739 }
1740 }
1741 mpi::Communicator(ctx_.spfft<double>().communicator()).allreduce(&mmom(0, 0), static_cast<int>(mmom.size()));
1742 return mmom;
1743}
1744
1745std::tuple<std::array<double, 3>, std::array<double, 3>, std::vector<std::array<double, 3>>>
1747{
1748 PROFILE("sirius::Density::get_magnetisation");
1749
1750 std::array<double, 3> total_mag({0, 0, 0});
1751 std::vector<std::array<double, 3>> mt_mag(ctx_.unit_cell().num_atoms(), {0, 0, 0});
1752 std::array<double, 3> it_mag({0, 0, 0});
1753
1754 std::vector<int> idx = (ctx_.num_mag_dims() == 1) ? std::vector<int>({2}) : std::vector<int>({2, 0, 1});
1755
1756 for (int j = 0; j < ctx_.num_mag_dims(); j++) {
1757 auto result = this->mag(j).integrate();
1758 total_mag[idx[j]] = std::get<0>(result);
1759 it_mag[idx[j]] = std::get<1>(result);
1760 if (ctx_.full_potential()) {
1761 auto v = std::get<2>(result);
1762 for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
1763 mt_mag[ia][idx[j]] = v[ia];
1764 }
1765 }
1766 }
1767
1768 if (!ctx_.full_potential()) {
1769 auto mmom = this->compute_atomic_mag_mom();
1770 for (int j = 0; j < ctx_.num_mag_dims(); j++) {
1771 for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
1772 mt_mag[ia][idx[j]] = mmom(j, ia);
1773 }
1774 }
1775 }
1776 return std::make_tuple(total_mag, it_mag, mt_mag);
1777}
1778
1781{
1782 auto nbf = ctx_.unit_cell().atom(ia__).type().mt_basis_size();
1783 sddk::mdarray<double, 2> dm(nbf * (nbf + 1) / 2, ctx_.num_mag_dims() + 1);
1784 for (int xi2 = 0; xi2 < nbf; xi2++) {
1785 for (int xi1 = 0; xi1 <= xi2; xi1++) {
1786 auto idx12 = packed_index(xi1, xi2);
1787 switch (ctx_.num_mag_dims()) {
1788 case 3: {
1789 dm(idx12, 2) = 2 * std::real(this->density_matrix(ia__)(xi2, xi1, 2));
1790 dm(idx12, 3) = -2 * std::imag(this->density_matrix(ia__)(xi2, xi1, 2));
1791 }
1792 case 1: {
1793 dm(idx12, 0) =
1794 std::real(this->density_matrix(ia__)(xi2, xi1, 0) + this->density_matrix(ia__)(xi2, xi1, 1));
1795 dm(idx12, 1) =
1796 std::real(this->density_matrix(ia__)(xi2, xi1, 0) - this->density_matrix(ia__)(xi2, xi1, 1));
1797 break;
1798 }
1799 case 0: {
1800 dm(idx12, 0) = this->density_matrix(ia__)(xi2, xi1, 0).real();
1801 break;
1802 }
1803 }
1804 }
1805 }
1806 return dm;
1807}
1808
1809sddk::mdarray<double, 3>
1811{
1812 auto nbf = atom_type__.mt_basis_size();
1813
1814 /* convert to real matrix */
1815 sddk::mdarray<double, 3> dm(nbf * (nbf + 1) / 2, atom_type__.num_atoms(), ctx_.num_mag_dims() + 1);
1816 #pragma omp parallel for
1817 for (int i = 0; i < atom_type__.num_atoms(); i++) {
1818 int ia = atom_type__.atom_id(i);
1819 auto dm1 = this->density_matrix_aux(typename atom_index_t::global(ia));
1820 for (int j = 0; j < ctx_.num_mag_dims() + 1; j++) {
1821 for (int k = 0; k < nbf * (nbf + 1) / 2; k++) {
1822 dm(k, i, j) = dm1(k, j);
1823 }
1824 }
1825 }
1826 return dm;
1827}
1828
1829void
1831{
1832 auto func_prop = mixer::periodic_function_property();
1833 auto func_prop1 = mixer::periodic_function_property_modified(true);
1834 auto density_prop = mixer::density_function_property();
1835 auto paw_prop = mixer::paw_density_function_property();
1836 auto hubbard_prop = mixer::hubbard_matrix_function_property();
1837
1838 /* create mixer */
1839 this->mixer_ =
1840 mixer::Mixer_factory<Periodic_function<double>, Periodic_function<double>, Periodic_function<double>,
1842 PAW_density<double>, Hubbard_matrix>(mixer_cfg__);
1843
1844 if (ctx_.full_potential()) {
1845 this->mixer_->initialize_function<0>(func_prop, component(0), ctx_, [&](int ia){return lmax_t(ctx_.lmax_rho());});
1846 if (ctx_.num_mag_dims() > 0) {
1847 this->mixer_->initialize_function<1>(func_prop, component(1), ctx_, [&](int ia){return lmax_t(ctx_.lmax_rho());});
1848 }
1849 if (ctx_.num_mag_dims() > 1) {
1850 this->mixer_->initialize_function<2>(func_prop, component(2), ctx_, [&](int ia){return lmax_t(ctx_.lmax_rho());});
1851 this->mixer_->initialize_function<3>(func_prop, component(3), ctx_, [&](int ia){return lmax_t(ctx_.lmax_rho());});
1852 }
1853 } else {
1854 /* initialize functions */
1855 if (mixer_cfg__.use_hartree()) {
1856 this->mixer_->initialize_function<0>(func_prop1, component(0), ctx_);
1857 } else {
1858 this->mixer_->initialize_function<0>(func_prop, component(0), ctx_);
1859 }
1860 if (ctx_.num_mag_dims() > 0) {
1861 this->mixer_->initialize_function<1>(func_prop, component(1), ctx_);
1862 }
1863 if (ctx_.num_mag_dims() > 1) {
1864 this->mixer_->initialize_function<2>(func_prop, component(2), ctx_);
1865 this->mixer_->initialize_function<3>(func_prop, component(3), ctx_);
1866 }
1867 }
1868
1869 this->mixer_->initialize_function<4>(density_prop, *density_matrix_, unit_cell_, ctx_.num_mag_comp());
1870
1871 if (ctx_.unit_cell().num_paw_atoms()) {
1872 this->mixer_->initialize_function<5>(paw_prop, *paw_density_, unit_cell_);
1873 }
1874 if (occupation_matrix_) {
1875 this->mixer_->initialize_function<6>(hubbard_prop, *occupation_matrix_, ctx_);
1876 }
1877}
1878
1879void
1880Density::mixer_input()
1881{
1882 PROFILE("sirius::Density::mixer_input");
1883
1884 mixer_->set_input<0>(component(0));
1885 if (ctx_.num_mag_dims() > 0) {
1886 mixer_->set_input<1>(component(1));
1887 }
1888 if (ctx_.num_mag_dims() > 1) {
1889 mixer_->set_input<2>(component(2));
1890 mixer_->set_input<3>(component(3));
1891 }
1892
1893 mixer_->set_input<4>(*density_matrix_);
1894
1895 if (ctx_.unit_cell().num_paw_atoms()) {
1896 mixer_->set_input<5>(*paw_density_);
1897 }
1898
1899 if (occupation_matrix_) {
1900 mixer_->set_input<6>(*occupation_matrix_);
1901 }
1902}
1903
1904void
1905Density::mixer_output()
1906{
1907 PROFILE("sirius::Density::mixer_output");
1908
1909 mixer_->get_output<0>(component(0));
1910 if (ctx_.num_mag_dims() > 0) {
1911 mixer_->get_output<1>(component(1));
1912 }
1913 if (ctx_.num_mag_dims() > 1) {
1914 mixer_->get_output<2>(component(2));
1915 mixer_->get_output<3>(component(3));
1916 }
1917
1918 mixer_->get_output<4>(*density_matrix_);
1919
1920 if (ctx_.unit_cell().num_paw_atoms()) {
1921 mixer_->get_output<5>(*paw_density_);
1922 }
1923
1924 if (occupation_matrix_) {
1925 mixer_->get_output<6>(*occupation_matrix_);
1926 }
1927
1928 /* transform mixed density to plane-wave domain */
1929 this->fft_transform(-1);
1930}
1931
1932double
1934{
1935 PROFILE("sirius::Density::mix");
1936
1937 mixer_input();
1938 double rms = mixer_->mix(ctx_.cfg().settings().mixer_rms_min());
1939 mixer_output();
1940
1941 return rms;
1942}
1943
1944void Density::print_info(std::ostream& out__) const
1945{
1946 auto result = this->rho().integrate();
1947
1948 auto total_charge = std::get<0>(result);
1949 auto it_charge = std::get<1>(result);
1950 auto mt_charge = std::get<2>(result);
1951
1952 auto result_mag = this->get_magnetisation();
1953 auto total_mag = std::get<0>(result_mag);
1954 auto it_mag = std::get<1>(result_mag);
1955 auto mt_mag = std::get<2>(result_mag);
1956
1957 auto write_vector = [&](r3::vector<double> v__) {
1958 out__ << "[" << std::setw(9) << std::setprecision(5) << std::fixed << v__[0] << ", " << std::setw(9)
1959 << std::setprecision(5) << std::fixed << v__[1] << ", " << std::setw(9) << std::setprecision(5)
1960 << std::fixed << v__[2] << "]";
1961 };
1962
1963 out__ << "Charges and magnetic moments" << std::endl
1964 << hbar(80, '-') << std::endl;
1965 if (ctx_.full_potential()) {
1966 double total_core_leakage{0.0};
1967 out__ << "atom charge core leakage";
1968 if (ctx_.num_mag_dims()) {
1969 out__ << " moment |moment|";
1970 }
1971 out__ << std::endl << hbar(80, '-') << std::endl;
1972
1973 for (int ia = 0; ia < unit_cell_.num_atoms(); ia++) {
1974 double core_leakage = unit_cell_.atom(ia).symmetry_class().core_leakage();
1975 total_core_leakage += core_leakage;
1976 out__ << std::setw(4) << ia << std::setw(12) << std::setprecision(6) << std::fixed << mt_charge[ia]
1977 << std::setw(16) << std::setprecision(6) << std::scientific << core_leakage;
1978 if (ctx_.num_mag_dims()) {
1979 r3::vector<double> v(mt_mag[ia]);
1980 out__ << " ";
1981 write_vector(v);
1982 out__ << std::setw(12) << std::setprecision(6) << std::fixed << v.length();
1983 }
1984 out__ << std::endl;
1985 }
1986 out__ << std::endl;
1987 out__ << "total core leakage : " << std::setprecision(8) << std::scientific << total_core_leakage
1988 << std::endl
1989 << "interstitial charge : " << std::setprecision(6) << std::fixed << it_charge << std::endl;
1990 if (ctx_.num_mag_dims()) {
1991 r3::vector<double> v(it_mag);
1992 out__ << "interstitial moment : ";
1993 write_vector(v);
1994 out__ << ", magnitude : " << std::setprecision(6) << std::fixed << v.length() << std::endl;
1995 }
1996 } else {
1997 if (ctx_.num_mag_dims()) {
1998 out__ << "atom moment |moment|" << std::endl
1999 << hbar(80, '-') << std::endl;
2000
2001 for (int ia = 0; ia < unit_cell_.num_atoms(); ia++) {
2002 r3::vector<double> v(mt_mag[ia]);
2003 out__ << std::setw(4) << ia << " ";
2004 write_vector(v);
2005 out__ << std::setw(12) << std::setprecision(6) << std::fixed << v.length() << std::endl;
2006 }
2007 out__ << std::endl;
2008 }
2009 }
2010 out__ << "total charge : " << std::setprecision(6) << std::fixed << total_charge << std::endl;
2011
2012 if (ctx_.num_mag_dims()) {
2013 r3::vector<double> v(total_mag);
2014 out__ << "total moment : ";
2015 write_vector(v);
2016 out__ << ", magnitude : " << std::setprecision(6) << std::fixed << v.length() << std::endl;
2017 }
2018
2019 /*
2020 * DEBUG: compute magnetic moments analytically
2021 */
2022 //auto Rmt = ctx_.unit_cell().find_mt_radii(1, true);
2023
2024 //for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
2025 // double mom{0};
2026 // for (int ig = 0; ig < ctx_.gvec().num_gvec(); ig++) {
2027 // auto ff = sirius::unit_step_function_form_factors(Rmt[ctx_.unit_cell().atom(ia).type_id()], ctx_.gvec().gvec_len(ig));
2028 // mom += (ctx_.gvec_phase_factor(ctx_.gvec().gvec(ig), ia) * ff * this->magnetization(0).f_pw_local(ig)).real();
2029 // }
2030 // mom *= fourpi;
2031 // if (ctx_.gvec().reduced()) {
2032 // mom *= 2;
2033 // }
2034 // out__ << "ia="<<ia<<" mom="<<mom<<std::endl;
2035 //}
2036}
2037
2038} // namespace sirius
Contains declaration and implementation of sirius::Beta_projectors_base class.
int num_atoms() const
Return number of atoms belonging to the current symmetry class.
double radial_function(int ir, int idx) const
Get a value of the radial functions.
Defines the properties of atom type.
Definition: atom_type.hpp:93
int mt_basis_size() const
Total number of muffin-tin basis functions (APW + LO).
Definition: atom_type.hpp:880
int atom_id(int idx) const
Return atom ID (global index) by the index of atom within a given type.
Definition: atom_type.hpp:940
int num_mt_points() const
Return number of muffin-tin radial grid points.
Definition: atom_type.hpp:718
auto const & indexr() const
Return const reference to the index of radial functions.
Definition: atom_type.hpp:817
void init_free_atom_density(bool smooth)
Initialize the free atom density (smooth or true).
Definition: atom_type.cpp:241
double free_atom_density(const int idx) const
Get free atom density at i-th point of radial grid.
Definition: atom_type.hpp:766
int mt_radial_basis_size() const
Total number of radial basis functions.
Definition: atom_type.hpp:886
int num_atoms() const
Return number of atoms of a given type.
Definition: atom_type.hpp:934
int type_id() const
Return atom type id.
Definition: atom.hpp:336
Atom_symmetry_class & symmetry_class()
Return reference to corresponding atom symmetry class.
Definition: atom.hpp:324
Atom_type const & type() const
Return const reference to corresponding atom type object.
Definition: atom.hpp:318
auto vector_field() const
Return vector field.
Definition: atom.hpp:354
sddk::mdarray< double, 2 > compute_atomic_mag_mom() const
Calculate approximate atomic magnetic moments in case of PP-PW.
Definition: density.cpp:1718
std::unique_ptr< mixer::Mixer< Periodic_function< double >, Periodic_function< double >, Periodic_function< double >, Periodic_function< double >, density_matrix_t, PAW_density< double >, Hubbard_matrix > > mixer_
Density mixer.
Definition: density.hpp:252
double core_leakage() const
Find the total leakage of the core states out of the muffin-tins.
Definition: density.cpp:138
void generate_paw_density()
Generate and in lm components.
Definition: density.cpp:552
Unit_cell & unit_cell_
Alias to ctx_.unit_cell()
Definition: density.hpp:217
void mixer_init(config_t::mixer_t const &mixer_cfg__)
Initialize density mixer.
Definition: density.cpp:1830
void update()
Update internal parameters after a change of lattice vectors or atomic coordinates.
Definition: density.cpp:120
std::unique_ptr< density_matrix_t > density_matrix_
Density matrix for all atoms.
Definition: density.hpp:221
void generate_core_charge_density()
Generate charge density of core states.
Definition: density.hpp:311
std::unique_ptr< PAW_density< double > > paw_density_
Local fraction of atoms with PAW correction.
Definition: density.hpp:224
sddk::mdarray< double, 3 > density_matrix_aux(Atom_type const &atom_type__) const
Return density matrix for all atoms of a given type in auxiliary form.
Definition: density.cpp:1810
void augment()
Add augmentation charge Q(r).
Definition: density.cpp:1209
bool check_num_electrons() const
Check total density for the correct number of electrons.
Definition: density.cpp:1057
void reduce_density_matrix(Atom_type const &atom_type__, sddk::mdarray< std::complex< double >, 3 > const &zdens__, sddk::mdarray< double, 3 > &mt_density_matrix__)
Reduce complex density matrix over magnetic quantum numbers.
Definition: density.cpp:1541
Density(Simulation_context &ctx__)
Constructor.
Definition: density.cpp:81
sddk::mdarray< std::complex< double >, 2 > generate_rho_aug() const
Generate augmentation charge density.
Definition: density.cpp:1374
double mix()
Mix new density.
Definition: density.cpp:1933
std::tuple< std::array< double, 3 >, std::array< double, 3 >, std::vector< std::array< double, 3 > > > get_magnetisation() const
Get total magnetization and also contributions from interstitial and muffin-tin parts.
Definition: density.cpp:1746
auto const & rho() const
Return const reference to charge density (scalar functions).
Definition: density.hpp:416
void initial_density()
Generate initial charge density and magnetization.
Definition: density.cpp:148
void generate_valence_mt()
Generate valence density in the muffin-tins.
Definition: density.cpp:1580
std::unique_ptr< Smooth_periodic_function< double > > rho_pseudo_core_
Pointer to pseudo core charge density.
Definition: density.hpp:242
std::unique_ptr< Occupation_matrix > occupation_matrix_
Occupation matrix of the LDA+U method.
Definition: density.hpp:227
void add_k_point_contribution_rg(K_point< T > *kp__, std::array< wf::Wave_functions_fft< T >, 2 > &wf_fft__)
Add k-point contribution to the density and magnetization defined on the regular FFT grid.
Definition: density.cpp:676
std::array< std::unique_ptr< Smooth_periodic_function< double > >, 4 > rho_mag_coarse_
Density and magnetization on the coarse FFT mesh.
Definition: density.hpp:234
void generate_valence(K_point_set const &ks__)
Generate valence charge density and magnetization from the wave functions.
Definition: density.cpp:1230
void init_density_matrix_for_paw()
Initialize PAW density matrix.
Definition: density.cpp:436
void generate(K_point_set const &ks__, bool symmetrize__, bool add_core__, bool transform_to_rg__)
Generate full charge density (valence + core) and magnetization from the wave functions.
Definition: density.cpp:1088
Four-component function consisting of scalar and vector parts.
Definition: field4d.hpp:40
Compact storage of non-zero Gaunt coefficients .
Definition: gaunt.hpp:58
int num_gaunt(int lm3) const
Return number of non-zero Gaunt coefficients for a given lm3.
Definition: gaunt.hpp:126
gaunt_L1_L2< T > const & gaunt(int lm3, int idx) const
Return a structure containing {lm1, lm2, coef} for a given lm3 and index.
Definition: gaunt.hpp:147
Describes Hubbard orbital occupancy or potential correction matrices.
Set of k-points.
Definition: k_point_set.hpp:41
int num_kpoints() const
Return total number of k-points.
K-point related variables and methods.
Definition: k_point.hpp:44
double band_occupancy(int j__, int ispn__) const
Get band occupancy.
Definition: k_point.hpp:434
double weight() const
Return weight of k-point.
Definition: k_point.hpp:456
int num_occupied_bands(int ispn__=-1) const
Get the number of occupied bands for each spin channel.
Definition: k_point.hpp:399
PAW density storage.
Definition: density.hpp:104
Representation of the periodical function on the muffin-tin geometry.
static void convert(int lmax__, double const *f_rlm__, std::complex< double > *f_ylm__)
Convert form Rlm to Ylm representation.
Definition: sht.hpp:264
static double gaunt_rrr(int l1, int l2, int l3, int m1, int m2, int m3)
Gaunt coefficent of three real spherical harmonics.
Definition: sht.hpp:423
Simulation context is a set of parameters and objects describing a single simulation.
auto gvec_phase_factor(r3::vector< int > G__, int ia__) const
Phase factors .
auto const & gvec() const
Return const reference to Gvec object.
auto const & augmentation_op(int iat__) const
Returns a constant pointer to the augmentation operator of a given atom type.
std::ostream & out() const
Return output stream.
mpi::Communicator const & comm() const
Total communicator of the simulation.
int num_spins() const
Number of spin components.
bool gamma_point(bool gamma_point__)
Set flag for Gamma-point calculation.
int num_spinors() const
Number of spinor wave-functions labeled by a sinlge band index.
int num_mag_comp() const
Number of components in the complex density matrix.
int num_bands(int num_bands__)
Set the number of bands.
int num_mag_dims() const
Number of dimensions in the magnetization vector.
double pw_cutoff() const
Plane-wave cutoff for G-vectors (in 1/[a.u.]).
bool use_symmetry() const
Get a use_symmetry flag.
double omega() const
Unit cell volume.
Definition: unit_cell.hpp:275
int max_num_mt_points() const
Maximum number of muffin-tin points among all atom types.
Definition: unit_cell.hpp:397
Atom const & atom(int id__) const
Return const atom instance by id.
Definition: unit_cell.hpp:344
int num_paw_atoms() const
Return number of atoms with PAW pseudopotential.
Definition: unit_cell.hpp:161
int num_atom_symmetry_classes() const
Number of atom symmetry classes.
Definition: unit_cell.hpp:320
bool augment() const
Return 'True' if at least one atom in the unit cell has an augmentation charge.
Definition: unit_cell.hpp:597
auto const & spl_num_paw_atoms() const
Get split index of PAW atoms.
Definition: unit_cell.hpp:172
int num_atom_types() const
Number of atom types.
Definition: unit_cell.hpp:281
int max_mt_basis_size() const
Maximum number of basis functions among all atom types.
Definition: unit_cell.hpp:430
Atom_type & atom_type(int id__)
Return atom type instance by id.
Definition: unit_cell.hpp:288
int max_mt_radial_basis_size() const
Maximum number of radial functions among all atom types.
Definition: unit_cell.hpp:440
int num_atoms() const
Number of atoms in the unit cell.
Definition: unit_cell.hpp:338
std::vector< double > find_mt_radii(int auto_rmt__, bool inflate__)
Automatically determine new muffin-tin radii as a half distance between neighbor atoms.
Definition: unit_cell.cpp:41
double num_valence_electrons() const
Number of valence electrons.
Definition: unit_cell.hpp:375
double num_electrons() const
Total number of electrons (core + valence).
Definition: unit_cell.hpp:369
Atom_symmetry_class const & atom_symmetry_class(int id__) const
Return const symmetry class instance by class id.
Definition: unit_cell.hpp:326
auto paw_atom_index(paw_atom_index_t::global ipaw__) const
Return global index of atom by the index in the list of PAW atoms.
Definition: unit_cell.hpp:183
Helper class to wrap stream id (integer number).
Definition: acc.hpp:132
Parameters of the mixer.
Definition: config.hpp:16
auto use_hartree() const
Use Hartree potential in the inner() product for residuals.
Definition: config.hpp:95
void gemm(char transa, char transb, ftn_int m, ftn_int n, ftn_int k, T const *alpha, T const *A, ftn_int lda, T const *B, ftn_int ldb, T const *beta, T *C, ftn_int ldc, acc::stream_id sid=acc::stream_id(-1)) const
General matrix-matrix multiplication.
MPI communicator wrapper.
void allreduce(T *buffer__, int count__) const
Perform the in-place (the output buffer is used as the input buffer) all-to-all reduction.
int rank() const
Rank of MPI process inside communicator.
Multidimensional array with the column-major (Fortran) order.
Definition: memory.hpp:660
uint64_t hash(uint64_t h__=5381) const
Compute hash of the array.
Definition: memory.hpp:1242
void copy_to(memory_t mem__, size_t idx0__, size_t n__, acc::stream_id sid=acc::stream_id(-1))
Copy n elements starting from idx0 from one memory type to another.
Definition: memory.hpp:1339
void zero(memory_t mem__, size_t idx0__, size_t n__)
Zero n elements starting from idx0.
Definition: memory.hpp:1316
uint32_t ld() const
Return leading dimension size.
Definition: memory.hpp:1233
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Definition: memory.hpp:1057
T checksum(size_t idx0__, size_t size__) const
Compute checksum.
Definition: memory.hpp:1262
size_t size() const
Return total size (number of elements) of the array.
Definition: memory.hpp:1207
Wave-fucntions in the FFT-friendly distribution.
Describe a range of bands.
Contains definition and partial implementation of sirius::Density class.
Generate complex spherical harmonics for the local set of G-vectors.
Generate spherical Bessel functions at the muffin-tin boundary for the local set of G-vectors.
Contains the mixer facttory for creating different types of mixers.
Contains declarations of functions required for mixing.
@ value
the parser finished reading a JSON value
void copyin(T *target__, T const *source__, size_t n__)
Copy memory from host to device.
Definition: acc.hpp:337
void sync_stream(stream_id sid__)
Synchronize a single stream.
Definition: acc.hpp:234
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
Definition: acc.hpp:320
size_t spfft_grid_size(T const &spfft__)
Total size of the SpFFT transformation grid.
Definition: fft.hpp:221
FunctionProperties< Periodic_function< double > > periodic_function_property_modified(bool use_coarse_gvec__)
Only for the PP-PW case.
int lmax(int lmmax__)
Get maximum orbital quantum number by the maximum lm index.
Definition: specfunc.hpp:56
int lmmax(int lmax)
Maximum number of combinations for a given .
Definition: specfunc.hpp:44
int lm(int l, int m)
Get composite lm index by angular index l and azimuthal index m.
Definition: specfunc.hpp:50
std::vector< int > l_by_lm(int lmax__)
Get array of orbital quantum numbers for each lm component.
Definition: specfunc.hpp:69
Namespace of the SIRIUS library.
Definition: sirius.f90:5
static void add_k_point_contribution_rg_noncollinear(fft::spfft_transform_type< T > &fft__, T w__, T const *inp_wf_up__, T const *inp_wf_dn__, int nr__, sddk::mdarray< std::complex< T >, 1 > &psi_r_up__, sddk::mdarray< T, 2 > &density_rg__)
Compute contribution to density and megnetisation from the 2-component spinor wave-functions.
Definition: density.cpp:612
auto split_in_blocks(int length__, int block_size__)
Split the 'length' elements into blocks with the initial block size.
Definition: splindex.hpp:43
@ pseudopotential
Pseudopotential (ultrasoft, norm-conserving, PAW).
auto hash(void const *buff, size_t size, uint64_t h=5381)
Simple hash function.
Definition: math_tools.hpp:95
const double y00
First spherical harmonic .
Definition: constants.hpp:51
strong_type< int, struct __block_id_tag > block_id
ID of the block.
Definition: splindex.hpp:108
strong_type< int, struct __rf_index_tag > rf_index
Radial function index.
static void add_k_point_contribution_rg_collinear(fft::spfft_transform_type< T > &fft__, int ispn__, T w__, T const *inp_wf__, int nr__, bool gamma__, sddk::mdarray< T, 2 > &density_rg__)
Compute non-magnetic or up- or dn- contribution of the wave-functions to the charge density.
Definition: density.cpp:569
@ nm
Non-magnetic case.
const double fourpi
Definition: constants.hpp:48
strong_type< int, struct __n_blocks_tag > n_blocks
Number of blocks to which the global index is split.
Definition: splindex.hpp:105
auto generate_sbessel_mt(Simulation_context const &ctx__, int lmax__)
Compute values of spherical Bessel functions at MT boundary.
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165
auto sum_fg_fl_yg(Simulation_context const &ctx__, int lmax__, std::complex< double > const *fpw__, sddk::mdarray< double, 3 > &fl__, sddk::matrix< std::complex< double > > &gvec_ylm__)
void symmetrize_density_matrix(Unit_cell const &uc__, density_matrix_t &dm__, int num_mag_comp__)
Symmetrize density matrix.
auto generate_gvec_ylm(Simulation_context const &ctx__, int lmax__)
Generate complex spherical harmonics for the local set of G-vectors.
int packed_index(int i__, int j__)
Pack two indices into one for symmetric matrices.
A time-based profiler.
Serialize madarray to json.
static const int ia
Global index of atom.
static const int nbf
Number of beta-projector functions for this atom.
static const int offset
Offset of beta-projectors in this chunk.
static const unsigned int fft_layout
Shuffle to FFT distribution.
LAPW specific function to compute sum over plane-wave coefficients and spherical harmonics.
Symmetrize density matrix of LAPW and PW methods.
Symmetrize density and potential fields (scalar + vector).
Symmetrize occupation matrix of the LDA+U method.