41#if defined(SIRIUS_GPU)
43update_density_rg_1_real_gpu(
int size__,
float const* psi_rg__,
float wt__,
float* density_rg__)
45 update_density_rg_1_real_gpu_float(size__, psi_rg__, wt__, density_rg__);
49update_density_rg_1_real_gpu(
int size__,
double const* psi_rg__,
double wt__,
double* density_rg__)
51 update_density_rg_1_real_gpu_double(size__, psi_rg__, wt__, density_rg__);
55update_density_rg_1_complex_gpu(
int size__, std::complex<float>
const* psi_rg__,
float wt__,
float* density_rg__)
57 update_density_rg_1_complex_gpu_float(size__, psi_rg__, wt__, density_rg__);
61update_density_rg_1_complex_gpu(
int size__, std::complex<double>
const* psi_rg__,
double wt__,
double* density_rg__)
63 update_density_rg_1_complex_gpu_double(size__, psi_rg__, wt__, density_rg__);
67update_density_rg_2_gpu(
int size__, std::complex<float>
const* psi_rg_up__, std::complex<float>
const* psi_rg_dn__,
68 float wt__,
float* density_x_rg__,
float* density_y_rg__)
70 update_density_rg_2_gpu_float(size__, psi_rg_up__, psi_rg_dn__, wt__, density_x_rg__, density_y_rg__);
74update_density_rg_2_gpu(
int size__, std::complex<double>
const* psi_rg_up__, std::complex<double>
const* psi_rg_dn__,
75 double wt__,
double* density_x_rg__,
double* density_y_rg__)
77 update_density_rg_2_gpu_double(size__, psi_rg_up__, psi_rg_dn__, wt__, density_x_rg__, density_y_rg__);
82 :
Field4D(ctx__,
lmax_t(ctx__.lmax_rho()), {ctx__.periodic_function_ptr(
"rho"), ctx__.periodic_function_ptr(
"magz"),
83 ctx__.periodic_function_ptr(
"magx"), ctx__.periodic_function_ptr(
"magy")})
84 , unit_cell_(ctx_.unit_cell())
86 PROFILE(
"sirius::Density");
88 if (!ctx_.initialized()) {
89 RTE_THROW(
"Simulation_context is not initialized");
92 using spf = Smooth_periodic_function<double>;
95 for (
int i = 0; i < ctx_.num_mag_dims() + 1; i++) {
96 rho_mag_coarse_[i] = std::make_unique<spf>(ctx_.spfft_coarse<
double>(), ctx_.gvec_coarse_fft_sptr());
100 if (!ctx_.full_potential()) {
101 rho_pseudo_core_ = std::make_unique<spf>(ctx_.spfft<
double>(), ctx_.gvec_fft_sptr());
106 density_matrix_ = std::make_unique<density_matrix_t>(ctx_.unit_cell(), ctx_.num_mag_comp());
108 if (ctx_.hubbard_correction()) {
109 occupation_matrix_ = std::make_unique<Occupation_matrix>(ctx_);
112 if (unit_cell_.num_paw_atoms()) {
113 paw_density_ = std::make_unique<PAW_density<double>>(unit_cell_);
122 PROFILE(
"sirius::Density::update");
124 if (!ctx_.full_potential()) {
131 generate_pseudo_core_charge_density();
150 PROFILE(
"sirius::Density::initial_density");
154 if (ctx_.full_potential()) {
155 initial_density_full_pot();
157 initial_density_pseudo();
168 symmetrize_field4d(*
this);
173Density::initial_density_pseudo()
176 auto q = ctx_.
gvec().shells_len();
179 auto const ff = ctx_.ri().ps_rho_->values(q, ctx_.
comm());
181 auto v = make_periodic_function<index_domain_t::local>(ctx_.unit_cell(), ctx_.
gvec(),
182 ctx_.phase_factors_t(), ff);
184 if (env::print_checksum()) {
187 print_checksum(
"rho_pw_init", z1, ctx_.
out());
189 std::copy(v.begin(), v.end(), &
rho().rg().f_pw_local(0));
191 if (env::print_hash()) {
192 auto h = sddk::mdarray<std::complex<double>, 1>(&v[0], ctx_.
gvec().count()).
hash();
193 print_hash(
"rho_pw_init", h, ctx_.
out());
200 s <<
"wrong initial charge density" << std::endl
201 <<
" integral of the density : " << std::setprecision(12) << charge << std::endl
207 rho().rg().fft_transform(1);
208 if (env::print_hash()) {
209 auto h =
rho().rg().values().hash();
210 print_hash(
"rho_rg_init", h, ctx_.
out());
214 for (
int ir = 0; ir < ctx_.spfft<
double>().local_slice_size(); ir++) {
215 rho().rg().value(ir) = std::max(
rho().rg().
value(ir), 0.0);
220 if (env::print_checksum()) {
221 auto cs =
rho().rg().checksum_rg();
222 print_checksum(
"rho_rg_init", cs, ctx_.
out());
230 auto w = [](
double R,
double x) {
231 double norm = 3.1886583903476735 * std::pow(R, 3);
233 return (1 - std::pow(x / R, 2)) * std::exp(x / R) / norm;
237 auto& atom_to_grid_map = ctx_.atoms_to_grid_idx_map(ia);
241 for (
auto coord : atom_to_grid_map) {
242 int ir = coord.first;
243 double r = coord.second;
245 mag(0).rg().value(ir) += v[2] * f;
247 mag(1).rg().value(ir) += v[0] * f;
248 mag(2).rg().value(ir) += v[1] * f;
253 this->fft_transform(-1);
255 if (env::print_checksum()) {
257 auto cs = component(i).rg().checksum_rg();
258 auto cs1 = component(i).rg().checksum_pw();
260 s <<
"component[" << i <<
"]_rg";
261 print_checksum(s.str(), cs, ctx_.
out());
262 std::stringstream s1;
263 s1 <<
"component[" << i <<
"]_pw";
264 print_checksum(s1.str(), cs1, ctx_.
out());
270Density::initial_density_full_pot()
278 Radial_integrals_rho_free_atom ri(ctx_.unit_cell(), ctx_.
pw_cutoff(), 40);
281 auto v = make_periodic_function<index_domain_t::local>(ctx_.unit_cell(), ctx_.
gvec(), ctx_.phase_factors_t(),
282 [&ri](
int iat,
double g) { return ri.value(iat, g); });
289 if (env::print_checksum()) {
290 auto z = sddk::mdarray<std::complex<double>, 1>(&v[0], ctx_.
gvec().count()).checksum();
292 print_checksum(
"rho_pw", z, ctx_.
out());
296 std::copy(v.begin(), v.end(), &
rho().rg().f_pw_local(0));
298 rho().rg().fft_transform(1);
300 if (env::print_checksum()) {
301 auto cs =
rho().rg().checksum_rg();
302 print_checksum(
"rho_rg", cs, ctx_.
out());
306 for (
int ir = 0; ir < ctx_.spfft<
double>().local_slice_size(); ir++) {
307 rho().rg().value(ir) = std::max(0.0,
rho().rg().
value(ir));
311 for (
int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
312 int nmtp = ctx_.unit_cell().atom(ia).num_mt_points();
314 for (
int ir = 0; ir < nmtp; ir++) {
315 double x = ctx_.unit_cell().atom(ia).radial_grid(ir);
320 int lmax = ctx_.lmax_rho();
325 std::vector<std::complex<double>> zil(
lmax + 1);
326 for (
int l = 0; l <=
lmax; l++) {
327 zil[l] = std::pow(std::complex<double>(0, 1), l);
335 auto flm =
sum_fg_fl_yg(ctx_,
lmax, v.at(sddk::memory_t::host), sbessel_mt, gvec_ylm);
339 for (
int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
340 double R = ctx_.unit_cell().atom(ia).mt_radius();
346 for (
int iat = 0; iat < ctx_.unit_cell().num_atom_types(); iat++) {
347 sddk::mdarray<double, 2> rRl(ctx_.unit_cell().max_num_mt_points(),
lmax + 1);
348 double R = ctx_.unit_cell().atom_type(iat).mt_radius();
349 int nmtp = ctx_.unit_cell().atom_type(iat).num_mt_points();
351 #pragma omp parallel for default(shared)
352 for (
int l = 0; l <=
lmax; l++) {
353 for (
int ir = 0; ir < nmtp; ir++) {
354 rRl(ir, l) = std::pow(ctx_.unit_cell().atom_type(iat).radial_grid(ir) / R, 2);
357 #pragma omp parallel for default(shared)
360 std::vector<double> glm(
lmmax);
364 for (
int ir = 0; ir < nmtp; ir++) {
365 rho().mt()[ia](
lm, ir) += glm[
lm] * rRl(ir, l);
397 auto len = v.length();
402 for (
int ir = 0; ir < nmtp; ir++) {
404 rho_s(ir) = this->
rho().mt()[ia](0, ir) *
y00 *
405 (1 - 3 * std::pow(x / R, 2) + 2 * std::pow(x / R, 3));
409 double q =
fourpi * rho_s.interpolate().integrate(2);
414 for (
int x : {0, 1, 2}) {
421 for (
int ir = 0; ir < nmtp; ir++) {
422 mag(0).mt()[ia](0, ir) = rho_s(ir) * v[2] / q /
y00;
425 for (
int ir = 0; ir < nmtp; ir++) {
426 mag(1).mt()[ia](0, ir) = rho_s(ir) * v[0] / q /
y00;
427 mag(2).mt()[ia](0, ir) = rho_s(ir) * v[1] / q /
y00;
441 auto& dm = density_matrix(ia);
445 auto& atom_type = atom.
type();
449 auto& occupations = atom_type.paw_wf_occ();
452 auto magn = atom.vector_field();
454 for (
int xi = 0; xi < nbf; xi++) {
455 auto& basis_func_index_dsc = atom_type.indexb()[xi];
457 int rad_func_index = basis_func_index_dsc.idxrf;
459 double occ = occupations[rad_func_index];
461 int l = basis_func_index_dsc.am.l();
465 dm(xi, xi, 0) = occ / double(2 * l + 1);
471 double nm = (std::abs(magn[2]) < 1.0) ? magn[2] : std::copysign(1, magn[2]);
472 dm(xi, xi, 0) = 0.5 * (1.0 +
nm) * occ /
double(2 * l + 1);
473 dm(xi, xi, 1) = 0.5 * (1.0 -
nm) * occ /
double(2 * l + 1);
484 auto ia_paw = ctx_.unit_cell().spl_num_paw_atoms(ialoc__);
485 auto ia = ctx_.unit_cell().paw_atom_index(ia_paw);
487 auto& atom_type = ctx_.unit_cell().atom(ia).type();
489 auto lmax = atom_type.indexr().lmax();
499 auto& grid = atom_type.radial_grid();
501 auto& paw_ae_wfs = atom_type.ae_paw_wfs_array();
502 auto& paw_ps_wfs = atom_type.ps_paw_wfs_array();
506 for (
int imagn = 0; imagn < ctx_.
num_mag_dims() + 1; imagn++) {
511 for (
int xi2 = 0; xi2 < atom_type.indexb().size(); xi2++) {
512 int lm2 = atom_type.indexb(xi2).lm;
513 int irb2 = atom_type.indexb(xi2).idxrf;
515 for (
int xi1 = 0; xi1 <= xi2; xi1++) {
516 int lm1 = atom_type.indexb(xi1).lm;
517 int irb1 = atom_type.indexb(xi1).idxrf;
520 int num_non_zero_gc = GC.
num_gaunt(lm1, lm2);
522 double diag_coef = (xi1 == xi2) ? 1.0 : 2.0;
527 for (
int inz = 0; inz < num_non_zero_gc; inz++) {
528 auto& lm3coef = GC.
gaunt(lm1, lm2, inz);
531 for (
int irad = 0; irad < grid.num_points(); irad++) {
534 double inv_r2 = diag_coef / (grid[irad] * grid[irad]);
538 ae_dens(lm3coef.lm3, irad) +=
539 dm(idx, imagn) * inv_r2 * lm3coef.coef * paw_ae_wfs(irad, irb1) * paw_ae_wfs(irad, irb2);
540 ps_dens(lm3coef.lm3, irad) +=
541 dm(idx, imagn) * inv_r2 * lm3coef.coef *
542 (paw_ps_wfs(irad, irb1) * paw_ps_wfs(irad, irb2) +
543 atom_type.q_radial_function(irb1, irb2,
l_by_lm[lm3coef.lm3])(irad));
558 PROFILE(
"sirius::Density::generate_paw_density");
560 #pragma omp parallel for
573 fft__.backward(inp_wf__, fft__.processing_unit());
576 auto data_ptr = fft__.space_domain_data(fft__.processing_unit());
578 switch (fft__.processing_unit()) {
579 case SPFFT_PU_HOST: {
581 #pragma omp parallel for
582 for (
int ir = 0; ir < nr__; ir++) {
583 density_rg__(ir, ispn__) += w__ * std::pow(data_ptr[ir], 2);
586 auto data =
reinterpret_cast<std::complex<T>*
>(data_ptr);
587 #pragma omp parallel for
588 for (
int ir = 0; ir < nr__; ir++) {
590 density_rg__(ir, ispn__) += w__ * (std::pow(z.real(), 2) + std::pow(z.imag(), 2));
596#if defined(SIRIUS_GPU)
598 update_density_rg_1_real_gpu(nr__, data_ptr, w__, density_rg__.at(sddk::memory_t::device, 0, ispn__));
600 auto data =
reinterpret_cast<std::complex<T>*
>(data_ptr);
601 update_density_rg_1_complex_gpu(nr__, data, w__, density_rg__.at(sddk::memory_t::device, 0, ispn__));
616 auto data_ptr = fft__.space_domain_data(fft__.processing_unit());
619 fft__.backward(inp_wf_up__, fft__.processing_unit());
622 switch (fft__.processing_unit()) {
623 case SPFFT_PU_HOST: {
624 auto inp =
reinterpret_cast<std::complex<T>*
>(data_ptr);
625 std::copy(inp, inp + nr__, psi_r_up__.at(sddk::memory_t::host));
629 acc::copy(psi_r_up__.at(sddk::memory_t::device),
reinterpret_cast<std::complex<T>*
>(data_ptr), nr__);
635 fft__.backward(inp_wf_dn__, fft__.processing_unit());
638 auto psi_r_dn =
reinterpret_cast<std::complex<T>*
>(data_ptr);
639 auto& psi_r_up = psi_r_up__;
641 switch (fft__.processing_unit()) {
642 case SPFFT_PU_HOST: {
643 #pragma omp parallel for
644 for (
int ir = 0; ir < nr__; ir++) {
645 auto r0 = (std::pow(psi_r_up[ir].real(), 2) + std::pow(psi_r_up[ir].imag(), 2)) * w__;
646 auto r1 = (std::pow(psi_r_dn[ir].real(), 2) + std::pow(psi_r_dn[ir].imag(), 2)) * w__;
648 auto z2 = psi_r_up[ir] *
std::conj(psi_r_dn[ir]) * std::complex<T>(w__, 0);
650 density_rg__(ir, 0) += r0;
651 density_rg__(ir, 1) += r1;
652 density_rg__(ir, 2) += 2.0 * std::real(z2);
653 density_rg__(ir, 3) -= 2.0 * std::imag(z2);
660 update_density_rg_1_complex_gpu(nr__, psi_r_up.at(sddk::memory_t::device), w__,
661 density_rg__.at(sddk::memory_t::device, 0, 0));
663 update_density_rg_1_complex_gpu(nr__, psi_r_dn, w__, density_rg__.at(sddk::memory_t::device, 0, 1));
665 update_density_rg_2_gpu(nr__, psi_r_up.at(sddk::memory_t::device), psi_r_dn, w__,
666 density_rg__.at(sddk::memory_t::device, 0, 2),
667 density_rg__.at(sddk::memory_t::device, 0, 3));
678 PROFILE(
"sirius::Density::add_k_point_contribution_rg");
682 auto& fft = ctx_.spfft_coarse<T>();
685 int nr = fft.local_slice_size();
691 if (fft.processing_unit() == SPFFT_PU_GPU) {
692 density_rg.
allocate(get_memory_pool(sddk::memory_t::device)).zero(sddk::memory_t::device);
698 for (
int ispn = 0; ispn < ctx_.
num_spins(); ispn++) {
700 auto wf_mem = wf_fft__[ispn].on_device() ? sddk::memory_t::device : sddk::memory_t::host;
702 int nbnd = wf_fft__[ispn].num_wf_local();
703 for (
int i = 0; i < nbnd; i++) {
704 auto j = wf_fft__[ispn].spl_num_wf().global_index(i);
707 auto inp_wf = wf_fft__[ispn].pw_coeffs_spfft(wf_mem,
wf::band_index(i));
715 if (fft.processing_unit() == SPFFT_PU_GPU) {
716 psi_r_up.
allocate(get_memory_pool(sddk::memory_t::device));
719 RTE_ASSERT(wf_fft__[0].num_wf_local() == wf_fft__[1].num_wf_local());
721 int nbnd = wf_fft__[0].num_wf_local();
722 for (
int i = 0; i < nbnd; i++) {
723 auto j = wf_fft__[0].spl_num_wf().global_index(i);
726 auto wf_mem_up = wf_fft__[0].on_device() ? sddk::memory_t::device : sddk::memory_t::host;
727 auto wf_mem_dn = wf_fft__[1].on_device() ? sddk::memory_t::device : sddk::memory_t::host;
730 auto inp_wf_up = wf_fft__[0].pw_coeffs_spfft(wf_mem_up,
wf::band_index(i));
731 auto inp_wf_dn = wf_fft__[1].pw_coeffs_spfft(wf_mem_dn,
wf::band_index(i));
738 if (fft.processing_unit() == SPFFT_PU_GPU) {
739 density_rg.
copy_to(sddk::memory_t::host);
745 #pragma omp parallel for
746 for (
int ir = 0; ir < nr; ir++) {
752 #pragma omp parallel for
753 for (
int ir = 0; ir < nr; ir++) {
754 rho_mag_coarse_[0]->value(ir) += (density_rg(ir, 0) + density_rg(ir, 1));
755 rho_mag_coarse_[1]->value(ir) += (density_rg(ir, 0) - density_rg(ir, 1));
760 #pragma omp parallel for
761 for (
int ir = 0; ir < nr; ir++) {
773 PROFILE(
"sirius::add_k_point_contribution_dm_fplapw");
775 auto& uc = ctx__.unit_cell();
785 for (
auto it : kp__.spinor_wave_functions().spl_num_atoms()) {
787 int mt_basis_size = uc.atom(ia).type().mt_basis_size();
789 for (
int ispn = 0; ispn < ctx__.
num_spins(); ispn++) {
791 for (
int xi = 0; xi < mt_basis_size; xi++) {
792 auto z = kp__.spinor_wave_functions().mt_coeffs(xi, it.li,
wf::spin_index(ispn),
795 wf2(xi, j, ispn) =
static_cast<std::complex<double>
>(z) * kp__.
band_occupancy(j, ispn) *
802 for (
int ispn = 0; ispn < ctx__.
num_spins(); ispn++) {
804 kp__.
num_occupied_bands(ispn), &one, &wf1(0, 0, ispn), wf1.ld(), &wf2(0, 0, ispn), wf2.ld(), &one,
805 density_matrix__[ia].at(sddk::memory_t::host, 0, 0, ispn), density_matrix__[ia].ld());
810 &one, &wf1(0, 0, 0), wf1.ld(), &wf2(0, 0, 1), wf2.ld(), &one,
811 density_matrix__[ia].at(sddk::memory_t::host, 0, 0, 2), density_matrix__[ia].ld());
817template <
typename T,
typename F>
819add_k_point_contribution_dm_pwpp_collinear(Simulation_context& ctx__, K_point<T>& kp__,
820 beta_projectors_coeffs_t<T>& bp_coeffs__, density_matrix_t& density_matrix__)
823 int nbeta = bp_coeffs__.beta_chunk_.num_beta_;
824 auto mt = ctx__.processing_unit_memory_t();
826 for (
int ispn = 0; ispn < ctx__.num_spins(); ispn++) {
828 int nbnd = kp__.num_occupied_bands(ispn);
831 inner_prod_beta<F>(ctx__.spla_context(), mt, ctx__.host_memory_t(), sddk::is_device_memory(mt),
832 bp_coeffs__, kp__.spinor_wave_functions(), wf::spin_index(ispn), wf::band_range(0, nbnd));
835 splindex_block<> spl_nbnd(nbnd,
n_blocks(kp__.comm().size()),
block_id(kp__.comm().rank()));
837 int nbnd_loc = spl_nbnd.local_size();
842 sddk::mdarray<std::complex<double>, 2> bp1(nbeta, nbnd_loc);
843 sddk::mdarray<std::complex<double>, 2> bp2(nbeta, nbnd_loc);
845 for (
int ia = 0; ia < bp_coeffs__.beta_chunk_.num_atoms_; ia++) {
853 for (
int i = 0; i < nbnd_loc; i++) {
855 auto j = spl_nbnd.global_index(i);
857 for (
int xi = 0; xi < nbf; xi++) {
858 bp1(xi, i) = beta_psi(offs + xi, j);
859 bp2(xi, i) =
std::conj(bp1(xi, i)) * kp__.weight() * kp__.band_occupancy(j, ispn);
864 .gemm(
'N',
'T', nbf, nbf, nbnd_loc, &la::constant<std::complex<double>>::one(),
865 &bp1(0, 0), bp1.ld(), &bp2(0, 0), bp2.ld(),
866 &la::constant<std::complex<double>>::one(), &density_matrix__[ja](0, 0, ispn),
867 density_matrix__[ja].ld());
874template <
typename T,
typename F>
876add_k_point_contribution_dm_pwpp_noncollinear(Simulation_context& ctx__, K_point<T>& kp__,
877 beta_projectors_coeffs_t<T>& bp_coeffs__, density_matrix_t& density_matrix__)
880 int nbeta = bp_coeffs__.beta_chunk_.num_beta_;
883 int nbnd = kp__.num_occupied_bands();
885 splindex_block<> spl_nbnd(nbnd,
n_blocks(kp__.comm().size()),
block_id(kp__.comm().rank()));
886 int nbnd_loc = spl_nbnd.local_size();
889 sddk::mdarray<std::complex<double>, 3> bp1(nbeta, nbnd_loc, ctx__.num_spins());
890 sddk::mdarray<std::complex<double>, 3> bp2(nbeta, nbnd_loc, ctx__.num_spins());
892 auto& uc = ctx__.unit_cell();
894 auto mt = ctx__.processing_unit_memory_t();
895 for (
int ispn = 0; ispn < ctx__.num_spins(); ispn++) {
898 inner_prod_beta<F>(ctx__.spla_context(), mt, ctx__.host_memory_t(), sddk::is_device_memory(mt),
899 bp_coeffs__, kp__.spinor_wave_functions(), wf::spin_index(ispn), wf::band_range(0, nbnd));
901 #pragma omp parallel for schedule(static)
902 for (
int i = 0; i < nbnd_loc; i++) {
903 auto j = spl_nbnd.global_index(i);
905 for (
int m = 0; m < nbeta; m++) {
906 bp1(m, i, ispn) = beta_psi(m, j);
907 bp2(m, i, ispn) =
std::conj(beta_psi(m, j));
908 bp2(m, i, ispn) *= kp__.weight() * kp__.band_occupancy(j, 0);
912 for (
int ia = 0; ia < bp_coeffs__.beta_chunk_.num_atoms_; ia++) {
919 if (uc.atom(ja).type().spin_orbit_coupling()) {
920 sddk::mdarray<std::complex<double>, 3> bp3(nbf, nbnd_loc, 2);
930 for (
int xi1 = 0; xi1 < nbf; xi1++) {
931 for (
int i = 0; i < nbnd_loc; i++) {
932 for (
int xi1p = 0; xi1p < nbf; xi1p++) {
933 if (uc.atom(ja).type().compare_index_beta_functions(xi1, xi1p)) {
935 bp1(offs + xi1p, i, 0) *
936 uc.atom(ja).type().f_coefficients(xi1, xi1p, 0, 0) +
937 bp1(offs + xi1p, i, 1) *
938 uc.atom(ja).type().f_coefficients(xi1, xi1p, 0, 1);
940 bp1(offs + xi1p, i, 0) *
941 uc.atom(ja).type().f_coefficients(xi1, xi1p, 1, 0) +
942 bp1(offs + xi1p, i, 1) *
943 uc.atom(ja).type().f_coefficients(xi1, xi1p, 1, 1);
949 for (
int xi1 = 0; xi1 < nbf; xi1++) {
950 for (
int i = 0; i < nbnd_loc; i++) {
951 bp1(offs + xi1, i, 0) = bp3(xi1, i, 0);
952 bp1(offs + xi1, i, 1) = bp3(xi1, i, 1);
958 for (
int xi1 = 0; xi1 < nbf; xi1++) {
959 for (
int i = 0; i < nbnd_loc; i++) {
960 for (
int xi1p = 0; xi1p < nbf; xi1p++) {
961 if (uc.atom(ja).type().compare_index_beta_functions(xi1, xi1p)) {
963 bp2(offs + xi1p, i, 0) *
964 uc.atom(ja).type().f_coefficients(xi1p, xi1, 0, 0) +
965 bp2(offs + xi1p, i, 1) *
966 uc.atom(ja).type().f_coefficients(xi1p, xi1, 1, 0);
968 bp2(offs + xi1p, i, 0) *
969 uc.atom(ja).type().f_coefficients(xi1p, xi1, 0, 1) +
970 bp2(offs + xi1p, i, 1) *
971 uc.atom(ja).type().f_coefficients(xi1p, xi1, 1, 1);
977 for (
int xi1 = 0; xi1 < nbf; xi1++) {
978 for (
int i = 0; i < nbnd_loc; i++) {
979 bp2(offs + xi1, i, 0) = bp3(xi1, i, 0);
980 bp2(offs + xi1, i, 1) = bp3(xi1, i, 1);
987 #pragma omp parallel for
988 for (
int ia = 0; ia < bp_coeffs__.beta_chunk_.num_atoms_; ia++) {
993 for (
int ispn = 0; ispn < 2; ispn++) {
995 .gemm(
'N',
'T', nbf, nbf, nbnd_loc, &la::constant<std::complex<double>>::one(),
996 &bp1(offs, 0, ispn), bp1.ld(), &bp2(offs, 0, ispn), bp2.ld(),
997 &la::constant<std::complex<double>>::one(), &density_matrix__[ja](0, 0, ispn),
998 density_matrix__[ja].ld());
1002 .gemm(
'N',
'T', nbf, nbf, nbnd_loc, &la::constant<std::complex<double>>::one(), &bp1(offs, 0, 0),
1003 bp1.ld(), &bp2(offs, 0, 1), bp2.ld(), &la::constant<std::complex<double>>::one(),
1004 &density_matrix__[ja](0, 0, 2), density_matrix__[ja].ld());
1009template <
typename T,
typename F>
1011add_k_point_contribution_dm_pwpp(Simulation_context& ctx__, K_point<T>& kp__, density_matrix_t& density_matrix__)
1013 PROFILE(
"sirius::add_k_point_contribution_dm_pwpp");
1015 if (!ctx__.unit_cell().max_mt_basis_size()) {
1019 auto bp_gen = kp__.beta_projectors().make_generator();
1020 auto bp_coeffs = bp_gen.prepare();
1022 for (
int ichunk = 0; ichunk < kp__.beta_projectors().num_chunks(); ichunk++) {
1024 bp_gen.generate(bp_coeffs, ichunk);
1026 if (ctx__.num_mag_dims() != 3) {
1027 add_k_point_contribution_dm_pwpp_collinear<T, F>(ctx__, kp__, bp_coeffs, density_matrix__);
1029 add_k_point_contribution_dm_pwpp_noncollinear<T, F>(ctx__, kp__, bp_coeffs, density_matrix__);
1037 double nel = std::get<0>(
rho().integrate());
1041 for (
int ir = 0; ir < ctx_.spfft<
double>().local_slice_size(); ir++) {
1042 rho().rg().value(ir) *= scale;
1044 if (ctx_.full_potential()) {
1046 for (
int ir = 0; ir <
unit_cell_.
atom(ia).num_mt_points(); ir++) {
1047 for (
int lm = 0;
lm < ctx_.lmmax_rho();
lm++) {
1048 rho().mt()[ia](
lm, ir) *= scale;
1060 if (ctx_.full_potential()) {
1061 nel = std::get<0>(
rho().integrate());
1068 std::stringstream s;
1069 s <<
"wrong number of electrons" << std::endl
1070 <<
" obtained value : " << nel << std::endl
1073 if (ctx_.full_potential()) {
1076 s << std::endl <<
" atom class : " << ic <<
", core leakage : " <<
core_leakage(ic);
1086template <
typename T>
1090 PROFILE(
"sirius::Density::generate");
1092 generate_valence<T>(ks__);
1094 if (ctx_.full_potential()) {
1100 for (
int ir = 0; ir <
unit_cell_.
atom(it.i).num_mt_points(); ir++) {
1101 rho().mt()[it.i](0, ir) +=
1108 this->component(iv).mt().sync(ctx_.unit_cell().spl_num_atoms());
1112 symmetrize_field4d(*
this);
1114 std::unique_ptr<density_matrix_t> dm_ref;
1115 std::unique_ptr<Occupation_matrix> om_ref;
1118 if (ctx_.cfg().control().verification() >= 1 && ctx_.cfg().parameters().use_ibz() ==
false) {
1122 if (ctx_.cfg().control().verification() >= 1 && ctx_.cfg().parameters().use_ibz() ==
false &&
1124 om_ref = std::make_unique<Occupation_matrix>(ctx_);
1139 if (ctx_.cfg().control().verification() >= 1 && ctx_.cfg().parameters().use_ibz() ==
false) {
1142 for (
size_t i = 0; i < (*density_matrix_)[ia].size(); i++) {
1143 diff = std::max(diff, std::abs((*dm_ref)[ia][i] - (*
density_matrix_)[ia][i]));
1146 std::string status = (diff > 1e-8) ?
"Fail" :
"OK";
1147 if (ctx_.verbosity() >= 1) {
1148 RTE_OUT(ctx_.
out()) <<
"error of density matrix symmetrization: " << diff <<
" "
1149 << status << std::endl;
1153 if (ctx_.cfg().control().verification() >= 1 && ctx_.cfg().parameters().use_ibz() ==
false &&
1156 for (
int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
1157 if (ctx_.unit_cell().atom(ia).type().hubbard_correction()) {
1159 diff1 = std::max(diff1, std::abs(om_ref->local(ia)[i] -
occupation_matrix_->local(ia)[i]));
1163 std::string status = (diff1 > 1e-8) ?
"Fail" :
"OK";
1164 if (ctx_.verbosity() >= 1) {
1165 RTE_OUT(ctx_.
out()) <<
"error of LDA+U local occupation matrix symmetrization: " << diff1
1166 <<
" " << status << std::endl;
1169 om_ref->update_nonlocal();
1174 diff2 = std::max(diff2, std::abs(om_ref->nonlocal(i)[j] -
occupation_matrix_->nonlocal(i)[j]));
1177 status = (diff2 > 1e-8) ?
"Fail" :
"OK";
1178 if (ctx_.verbosity() >= 1) {
1179 RTE_OUT(ctx_.
out()) <<
"error of LDA+U nonlocal occupation matrix symmetrization: " << diff2
1180 <<
" " << status << std::endl;
1196 if (transform_to_rg__) {
1197 this->fft_transform(1);
1201template void Density::generate<double>(
K_point_set const& ks__,
bool symmetrize__,
bool add_core__,
1202 bool transform_to_rg__);
1203#if defined(SIRIUS_USE_FP32)
1204template void Density::generate<float>(
K_point_set const& ks__,
bool symmetrize__,
bool add_core__,
1205 bool transform_to_rg__);
1211 PROFILE(
"sirius::Density::augment");
1221 #pragma omp parallel for schedule(static)
1222 for (
int igloc = 0; igloc < ctx_.
gvec().count(); igloc++) {
1223 this->component(iv).rg().f_pw_local(igloc) += rho_aug(igloc, iv);
1228template <
typename T>
1232 PROFILE(
"sirius::Density::generate_valence");
1238 wt += ks__.get<T>(ik)->weight();
1239 for (
int ispn = 0; ispn < ctx_.
num_spinors(); ispn++) {
1240 for (
int j = 0; j < ctx_.
num_bands(); j++) {
1241 occ_val += ks__.get<T>(ik)->weight() * ks__.get<T>(ik)->band_occupancy(j, ispn);
1246 if (std::abs(wt - 1.0) > 1e-12) {
1247 std::stringstream s;
1248 s <<
"K_point weights don't sum to one" << std::endl <<
" obtained sum: " << wt;
1254 std::stringstream s;
1255 s <<
"wrong band occupancies" << std::endl
1256 <<
" computed : " << occ_val << std::endl
1275 auto mem = ctx_.processing_unit() == sddk::device_t::CPU ? sddk::memory_t::host : sddk::memory_t::device;
1278 for (
auto it : ks__.spl_num_kpoints()) {
1279 auto kp = ks__.get<T>(it.i);
1281 std::array<wf::Wave_functions_fft<T>, 2> wf_fft;
1283 std::vector<wf::device_memory_guard> mg;
1285 mg.emplace_back(kp->spinor_wave_functions().memory_guard(mem, wf::copy_to::device));
1286 if (ctx_.hubbard_correction()) {
1287 mg.emplace_back(kp->hubbard_wave_functions_S().memory_guard(mem, wf::copy_to::device));
1290 for (
int ispn = 0; ispn < ctx_.
num_spins(); ispn++) {
1291 int nbnd = kp->num_occupied_bands(ispn);
1297 if (ctx_.full_potential()) {
1300 if (ctx_.
gamma_point() && (ctx_.so_correction() ==
false)) {
1303 add_k_point_contribution_dm_pwpp<T, std::complex<T>>(ctx_, *kp, *
density_matrix_);
1314 for (
int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
1315 ctx_.
comm().
allreduce(density_matrix(ia).at(sddk::memory_t::host),
static_cast<int>(density_matrix(ia).size()));
1322 auto& comm = ctx_.gvec_coarse_fft_sptr()->comm_ortho_fft();
1324 auto ptr = (ctx_.spfft_coarse<
double>().local_slice_size() == 0) ?
nullptr : &
rho_mag_coarse_[j]->value(0);
1327 comm.allreduce(ptr, ctx_.spfft_coarse<
double>().local_slice_size());
1329 if (env::print_checksum()) {
1332 print_checksum(
"rho_mag_coarse_rg", cs, ctx_.
out());
1337 for (
int igloc = 0; igloc < ctx_.gvec_coarse().count(); igloc++) {
1338 component(j).rg().f_pw_local(ctx_.
gvec().gvec_base_mapping(igloc)) =
rho_mag_coarse_[j]->f_pw_local(igloc);
1342 if (!ctx_.full_potential()) {
1346 if (ctx_.
gvec().comm().rank() == 0) {
1347 rho().rg().f_pw_local(0) += ctx_.cfg().parameters().extra_charge() / ctx_.unit_cell().omega();
1350 if (env::print_hash()) {
1352 print_hash(
"rho", h, ctx_.
out());
1358 std::stringstream s;
1359 s <<
"wrong unsymmetrized density" << std::endl
1360 <<
" obtained value : " << std::scientific << nel << std::endl
1368 if (ctx_.full_potential()) {
1376 PROFILE(
"sirius::Density::generate_rho_aug");
1379 int gvec_count = ctx_.
gvec().count();
1380 auto spl_ngv_loc =
split_in_blocks(gvec_count, ctx_.cfg().control().gvec_chunk_size());
1382 auto& mph = get_memory_pool(sddk::memory_t::host);
1387 switch (ctx_.processing_unit()) {
1388 case sddk::device_t::CPU: {
1389 rho_aug.
zero(sddk::memory_t::host);
1392 case sddk::device_t::GPU: {
1393 mpd = &get_memory_pool(sddk::memory_t::device);
1394 rho_aug.
allocate(*mpd).zero(sddk::memory_t::device);
1403 if (!atom_type.augment() || atom_type.num_atoms() == 0) {
1410 int nqlm = nbf * (nbf + 1) / 2;
1415 if (env::print_checksum()) {
1416 auto cs = dm.checksum();
1417 print_checksum(
"density_matrix_aux", cs, ctx_.
out());
1420 int ndm_pw = (ctx_.processing_unit() == sddk::device_t::CPU) ? 1 : ctx_.
num_mag_dims() + 1;
1425 print_memory_usage(ctx_.
out(), FILE_LINE);
1427 switch (ctx_.processing_unit()) {
1428 case sddk::device_t::CPU: {
1431 case sddk::device_t::GPU: {
1434 dm.allocate(*mpd).copy_to(sddk::memory_t::device);
1439 print_memory_usage(ctx_.
out(), FILE_LINE);
1446 for (
auto ng : spl_ngv_loc) {
1449 switch (ctx_.processing_unit()) {
1450 case sddk::device_t::CPU: {
1452 #pragma omp parallel for
1453 for (
int g = 0; g < ng; g++) {
1454 int ig = ctx_.
gvec().offset() + g_begin + g;
1455 for (
int i = 0; i < atom_type.num_atoms(); i++) {
1456 int ia = atom_type.atom_id(i);
1458 phase_factors(i, 2 * g) = z.real();
1459 phase_factors(i, 2 * g + 1) = z.imag();
1463 PROFILE_START(
"sirius::Density::generate_rho_aug|gemm");
1466 dm.at(sddk::memory_t::host, 0, 0, iv), dm.ld(),
1467 phase_factors.at(sddk::memory_t::host), phase_factors.
ld(),
1469 dm_pw.at(sddk::memory_t::host, 0, 0, 0), dm_pw.
ld());
1470 PROFILE_STOP(
"sirius::Density::generate_rho_aug|gemm");
1471 PROFILE_START(
"sirius::Density::generate_rho_aug|sum");
1472 #pragma omp parallel for
1473 for (
int g = 0; g < ng; g++) {
1474 int igloc = g_begin + g;
1475 std::complex<double> zsum(0, 0);
1477 for (
int i = 0; i < nqlm; i++) {
1480 std::complex<double> z2(dm_pw(i, 2 * g, 0),
1481 dm_pw(i, 2 * g + 1, 0));
1486 rho_aug(igloc, iv) += zsum;
1488 PROFILE_STOP(
"sirius::Density::generate_rho_aug|sum");
1492 case sddk::device_t::GPU: {
1493#if defined(SIRIUS_GPU)
1495 ctx_.
augmentation_op(iat).q_pw().at(sddk::memory_t::host, 0, 2 * g_begin), 2 * ng * nqlm);
1498 generate_dm_pw_gpu(atom_type.num_atoms(), ng, nbf,
1499 ctx_.unit_cell().atom_coord(iat).at(sddk::memory_t::device),
1500 ctx_.gvec_coord().at(sddk::memory_t::device, g_begin, 0),
1501 ctx_.gvec_coord().at(sddk::memory_t::device, g_begin, 1),
1502 ctx_.gvec_coord().at(sddk::memory_t::device, g_begin, 2),
1503 phase_factors.at(sddk::memory_t::device), dm.at(sddk::memory_t::device, 0, 0, iv),
1504 dm_pw.at(sddk::memory_t::device, 0, 0, iv), 1 + iv);
1505 sum_q_pw_dm_pw_gpu(ng, nbf, qpw.at(sddk::memory_t::device), qpw.ld(),
1506 dm_pw.at(sddk::memory_t::device, 0, 0, iv), dm_pw.
ld(),
1508 rho_aug.at(sddk::memory_t::device, g_begin, iv), 1 + iv);
1522 if (ctx_.processing_unit() == sddk::device_t::GPU) {
1523 rho_aug.
copy_to(sddk::memory_t::host);
1526 if (env::print_checksum()) {
1529 print_checksum(
"rho_aug", cs, ctx_.
out());
1532 if (env::print_hash()) {
1533 auto h = rho_aug.
hash();
1534 print_hash(
"rho_aug", h, ctx_.
out());
1540template <
int num_mag_dims>
1544 mt_density_matrix__.
zero();
1546 #pragma omp parallel for default(shared)
1548 int l2 = atom_type__.
indexr(idxrf2).am.l();
1549 for (
int idxrf1 = 0; idxrf1 <= idxrf2; idxrf1++) {
1550 int offs = idxrf2 * (idxrf2 + 1) / 2 + idxrf1;
1551 int l1 = atom_type__.
indexr(idxrf1).am.l();
1553 int xi2 = atom_type__.indexb().index_of(
rf_index(idxrf2));
1554 for (
int lm2 =
sf::lm(l2, -l2); lm2 <=
sf::lm(l2, l2); lm2++, xi2++) {
1555 int xi1 = atom_type__.indexb().index_of(
rf_index(idxrf1));
1556 for (
int lm1 =
sf::lm(l1, -l1); lm1 <=
sf::lm(l1, l1); lm1++, xi1++) {
1557 for (
int k = 0; k < atom_type__.gaunt_coefs().num_gaunt(lm1, lm2); k++) {
1558 int lm3 = atom_type__.gaunt_coefs().gaunt(lm1, lm2, k).lm3;
1559 auto gc = atom_type__.gaunt_coefs().gaunt(lm1, lm2, k).coef;
1562 mt_density_matrix__(lm3, offs, 2) += 2.0 * std::real(zdens__(xi1, xi2, 2) * gc);
1563 mt_density_matrix__(lm3, offs, 3) -= 2.0 * std::imag(zdens__(xi1, xi2, 2) * gc);
1566 mt_density_matrix__(lm3, offs, 1) += std::real(zdens__(xi1, xi2, 1) * gc);
1569 mt_density_matrix__(lm3, offs, 0) += std::real(zdens__(xi1, xi2, 0) * gc);
1582 PROFILE(
"sirius::Density::generate_valence_mt");
1585 if (ctx_.hubbard_correction()) {
1586 RTE_THROW(
"LDA+U in LAPW is not implemented");
1654 int num_rf_pairs = atom_type.mt_radial_basis_size() * (atom_type.mt_radial_basis_size() + 1) / 2;
1656 PROFILE_START(
"sirius::Density::generate|sum_zdens");
1659 reduce_density_matrix<3>(atom_type, density_matrix(it.i), mt_density_matrix);
1663 reduce_density_matrix<1>(atom_type, density_matrix(it.i), mt_density_matrix);
1667 reduce_density_matrix<0>(atom_type, density_matrix(it.i), mt_density_matrix);
1671 PROFILE_STOP(
"sirius::Density::generate|sum_zdens");
1673 PROFILE(
"sirius::Density::generate|expand_lm");
1675 for (
int idxrf2 = 0; idxrf2 < atom_type.mt_radial_basis_size(); idxrf2++) {
1676 int offs = idxrf2 * (idxrf2 + 1) / 2;
1677 for (
int idxrf1 = 0; idxrf1 <= idxrf2; idxrf1++) {
1679 int n = (idxrf1 == idxrf2) ? 1 : 2;
1680 for (
int ir = 0; ir <
unit_cell_.
atom(it.i).num_mt_points(); ir++) {
1689 &mt_density_matrix(0, 0, j), mt_density_matrix.
ld(), &rf_pairs(0, 0), rf_pairs.
ld(),
1693 auto sz = ctx_.lmmax_rho() * nmtp;
1697 std::copy(&dlm(0, 0, 2), &dlm(0, 0, 2) + sz, &mag(1).mt()[it.i](0, 0));
1699 std::copy(&dlm(0, 0, 3), &dlm(0, 0, 3) + sz, &mag(2).mt()[it.i](0, 0));
1702 for (
int ir = 0; ir < nmtp; ir++) {
1703 for (
int lm = 0;
lm < ctx_.lmmax_rho();
lm++) {
1704 rho().mt()[it.i](
lm, ir) = dlm(
lm, ir, 0) + dlm(
lm, ir, 1);
1705 mag(0).mt()[it.i](
lm, ir) = dlm(
lm, ir, 0) - dlm(
lm, ir, 1);
1711 std::copy(&dlm(0, 0, 0), &dlm(0, 0, 0) + sz, &
rho().mt()[it.i](0, 0));
1720 PROFILE(
"sirius::Density::compute_atomic_mag_mom");
1725 #pragma omp parallel for
1728 auto& atom_to_grid_map = ctx_.atoms_to_grid_idx_map(ia);
1730 for (
auto coord : atom_to_grid_map) {
1731 int ir = coord.first;
1733 mmom(j, ia) += mag(j).rg().value(ir);
1737 for (
int j : {0, 1, 2}) {
1745std::tuple<std::array<double, 3>, std::array<double, 3>, std::vector<std::array<double, 3>>>
1748 PROFILE(
"sirius::Density::get_magnetisation");
1750 std::array<double, 3> total_mag({0, 0, 0});
1751 std::vector<std::array<double, 3>> mt_mag(ctx_.unit_cell().num_atoms(), {0, 0, 0});
1752 std::array<double, 3> it_mag({0, 0, 0});
1754 std::vector<int> idx = (ctx_.
num_mag_dims() == 1) ? std::vector<int>({2}) : std::vector<int>({2, 0, 1});
1757 auto result = this->mag(j).integrate();
1758 total_mag[idx[j]] = std::get<0>(result);
1759 it_mag[idx[j]] = std::get<1>(result);
1760 if (ctx_.full_potential()) {
1761 auto v = std::get<2>(result);
1762 for (
int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
1763 mt_mag[ia][idx[j]] = v[ia];
1768 if (!ctx_.full_potential()) {
1771 for (
int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
1772 mt_mag[ia][idx[j]] = mmom(j, ia);
1776 return std::make_tuple(total_mag, it_mag, mt_mag);
1782 auto nbf = ctx_.unit_cell().atom(ia__).type().mt_basis_size();
1784 for (
int xi2 = 0; xi2 < nbf; xi2++) {
1785 for (
int xi1 = 0; xi1 <= xi2; xi1++) {
1789 dm(idx12, 2) = 2 * std::real(this->density_matrix(ia__)(xi2, xi1, 2));
1790 dm(idx12, 3) = -2 * std::imag(this->density_matrix(ia__)(xi2, xi1, 2));
1794 std::real(this->density_matrix(ia__)(xi2, xi1, 0) + this->density_matrix(ia__)(xi2, xi1, 1));
1796 std::real(this->density_matrix(ia__)(xi2, xi1, 0) - this->density_matrix(ia__)(xi2, xi1, 1));
1800 dm(idx12, 0) = this->density_matrix(ia__)(xi2, xi1, 0).real();
1809sddk::mdarray<double, 3>
1816 #pragma omp parallel for
1817 for (
int i = 0; i < atom_type__.
num_atoms(); i++) {
1818 int ia = atom_type__.
atom_id(i);
1821 for (
int k = 0; k < nbf * (nbf + 1) / 2; k++) {
1822 dm(k, i, j) = dm1(k, j);
1832 auto func_prop = mixer::periodic_function_property();
1834 auto density_prop = mixer::density_function_property();
1835 auto paw_prop = mixer::paw_density_function_property();
1836 auto hubbard_prop = mixer::hubbard_matrix_function_property();
1844 if (ctx_.full_potential()) {
1845 this->
mixer_->initialize_function<0>(func_prop, component(0), ctx_, [&](
int ia){
return lmax_t(ctx_.lmax_rho());});
1846 if (ctx_.num_mag_dims() > 0) {
1847 this->
mixer_->initialize_function<1>(func_prop, component(1), ctx_, [&](
int ia){
return lmax_t(ctx_.lmax_rho());});
1849 if (ctx_.num_mag_dims() > 1) {
1850 this->
mixer_->initialize_function<2>(func_prop, component(2), ctx_, [&](
int ia){
return lmax_t(ctx_.lmax_rho());});
1851 this->
mixer_->initialize_function<3>(func_prop, component(3), ctx_, [&](
int ia){
return lmax_t(ctx_.lmax_rho());});
1856 this->
mixer_->initialize_function<0>(func_prop1, component(0), ctx_);
1858 this->
mixer_->initialize_function<0>(func_prop, component(0), ctx_);
1861 this->
mixer_->initialize_function<1>(func_prop, component(1), ctx_);
1864 this->
mixer_->initialize_function<2>(func_prop, component(2), ctx_);
1865 this->
mixer_->initialize_function<3>(func_prop, component(3), ctx_);
1871 if (ctx_.unit_cell().num_paw_atoms()) {
1880Density::mixer_input()
1882 PROFILE(
"sirius::Density::mixer_input");
1884 mixer_->set_input<0>(component(0));
1886 mixer_->set_input<1>(component(1));
1889 mixer_->set_input<2>(component(2));
1890 mixer_->set_input<3>(component(3));
1893 mixer_->set_input<4>(*density_matrix_);
1895 if (ctx_.unit_cell().num_paw_atoms()) {
1896 mixer_->set_input<5>(*paw_density_);
1900 mixer_->set_input<6>(*occupation_matrix_);
1905Density::mixer_output()
1907 PROFILE(
"sirius::Density::mixer_output");
1909 mixer_->get_output<0>(component(0));
1911 mixer_->get_output<1>(component(1));
1914 mixer_->get_output<2>(component(2));
1915 mixer_->get_output<3>(component(3));
1918 mixer_->get_output<4>(*density_matrix_);
1920 if (ctx_.unit_cell().num_paw_atoms()) {
1921 mixer_->get_output<5>(*paw_density_);
1925 mixer_->get_output<6>(*occupation_matrix_);
1929 this->fft_transform(-1);
1935 PROFILE(
"sirius::Density::mix");
1938 double rms =
mixer_->mix(ctx_.cfg().settings().mixer_rms_min());
1944void Density::print_info(std::ostream& out__)
const
1946 auto result = this->
rho().integrate();
1948 auto total_charge = std::get<0>(result);
1949 auto it_charge = std::get<1>(result);
1950 auto mt_charge = std::get<2>(result);
1953 auto total_mag = std::get<0>(result_mag);
1954 auto it_mag = std::get<1>(result_mag);
1955 auto mt_mag = std::get<2>(result_mag);
1958 out__ <<
"[" << std::setw(9) << std::setprecision(5) << std::fixed << v__[0] <<
", " << std::setw(9)
1959 << std::setprecision(5) << std::fixed << v__[1] <<
", " << std::setw(9) << std::setprecision(5)
1960 << std::fixed << v__[2] <<
"]";
1963 out__ <<
"Charges and magnetic moments" << std::endl
1964 << hbar(80,
'-') << std::endl;
1965 if (ctx_.full_potential()) {
1966 double total_core_leakage{0.0};
1967 out__ <<
"atom charge core leakage";
1969 out__ <<
" moment |moment|";
1971 out__ << std::endl << hbar(80,
'-') << std::endl;
1976 out__ << std::setw(4) << ia << std::setw(12) << std::setprecision(6) << std::fixed << mt_charge[ia]
1977 << std::setw(16) << std::setprecision(6) << std::scientific <<
core_leakage;
1979 r3::vector<double> v(mt_mag[ia]);
1982 out__ << std::setw(12) << std::setprecision(6) << std::fixed << v.length();
1987 out__ <<
"total core leakage : " << std::setprecision(8) << std::scientific << total_core_leakage
1989 <<
"interstitial charge : " << std::setprecision(6) << std::fixed << it_charge << std::endl;
1991 r3::vector<double> v(it_mag);
1992 out__ <<
"interstitial moment : ";
1994 out__ <<
", magnitude : " << std::setprecision(6) << std::fixed << v.length() << std::endl;
1998 out__ <<
"atom moment |moment|" << std::endl
1999 << hbar(80,
'-') << std::endl;
2002 r3::vector<double> v(mt_mag[ia]);
2003 out__ << std::setw(4) << ia <<
" ";
2005 out__ << std::setw(12) << std::setprecision(6) << std::fixed << v.length() << std::endl;
2010 out__ <<
"total charge : " << std::setprecision(6) << std::fixed << total_charge << std::endl;
2013 r3::vector<double> v(total_mag);
2014 out__ <<
"total moment : ";
2016 out__ <<
", magnitude : " << std::setprecision(6) << std::fixed << v.length() << std::endl;
Contains declaration and implementation of sirius::Beta_projectors_base class.
int num_atoms() const
Return number of atoms belonging to the current symmetry class.
double radial_function(int ir, int idx) const
Get a value of the radial functions.
Defines the properties of atom type.
int mt_basis_size() const
Total number of muffin-tin basis functions (APW + LO).
int atom_id(int idx) const
Return atom ID (global index) by the index of atom within a given type.
int num_mt_points() const
Return number of muffin-tin radial grid points.
auto const & indexr() const
Return const reference to the index of radial functions.
void init_free_atom_density(bool smooth)
Initialize the free atom density (smooth or true).
double free_atom_density(const int idx) const
Get free atom density at i-th point of radial grid.
int mt_radial_basis_size() const
Total number of radial basis functions.
int num_atoms() const
Return number of atoms of a given type.
int type_id() const
Return atom type id.
Atom_symmetry_class & symmetry_class()
Return reference to corresponding atom symmetry class.
Atom_type const & type() const
Return const reference to corresponding atom type object.
auto vector_field() const
Return vector field.
sddk::mdarray< double, 2 > compute_atomic_mag_mom() const
Calculate approximate atomic magnetic moments in case of PP-PW.
std::unique_ptr< mixer::Mixer< Periodic_function< double >, Periodic_function< double >, Periodic_function< double >, Periodic_function< double >, density_matrix_t, PAW_density< double >, Hubbard_matrix > > mixer_
Density mixer.
double core_leakage() const
Find the total leakage of the core states out of the muffin-tins.
void generate_paw_density()
Generate and in lm components.
Unit_cell & unit_cell_
Alias to ctx_.unit_cell()
void mixer_init(config_t::mixer_t const &mixer_cfg__)
Initialize density mixer.
void update()
Update internal parameters after a change of lattice vectors or atomic coordinates.
std::unique_ptr< density_matrix_t > density_matrix_
Density matrix for all atoms.
void generate_core_charge_density()
Generate charge density of core states.
std::unique_ptr< PAW_density< double > > paw_density_
Local fraction of atoms with PAW correction.
sddk::mdarray< double, 3 > density_matrix_aux(Atom_type const &atom_type__) const
Return density matrix for all atoms of a given type in auxiliary form.
void augment()
Add augmentation charge Q(r).
bool check_num_electrons() const
Check total density for the correct number of electrons.
void reduce_density_matrix(Atom_type const &atom_type__, sddk::mdarray< std::complex< double >, 3 > const &zdens__, sddk::mdarray< double, 3 > &mt_density_matrix__)
Reduce complex density matrix over magnetic quantum numbers.
Density(Simulation_context &ctx__)
Constructor.
sddk::mdarray< std::complex< double >, 2 > generate_rho_aug() const
Generate augmentation charge density.
double mix()
Mix new density.
std::tuple< std::array< double, 3 >, std::array< double, 3 >, std::vector< std::array< double, 3 > > > get_magnetisation() const
Get total magnetization and also contributions from interstitial and muffin-tin parts.
auto const & rho() const
Return const reference to charge density (scalar functions).
void initial_density()
Generate initial charge density and magnetization.
void generate_valence_mt()
Generate valence density in the muffin-tins.
std::unique_ptr< Smooth_periodic_function< double > > rho_pseudo_core_
Pointer to pseudo core charge density.
std::unique_ptr< Occupation_matrix > occupation_matrix_
Occupation matrix of the LDA+U method.
void add_k_point_contribution_rg(K_point< T > *kp__, std::array< wf::Wave_functions_fft< T >, 2 > &wf_fft__)
Add k-point contribution to the density and magnetization defined on the regular FFT grid.
std::array< std::unique_ptr< Smooth_periodic_function< double > >, 4 > rho_mag_coarse_
Density and magnetization on the coarse FFT mesh.
void generate_valence(K_point_set const &ks__)
Generate valence charge density and magnetization from the wave functions.
void init_density_matrix_for_paw()
Initialize PAW density matrix.
void generate(K_point_set const &ks__, bool symmetrize__, bool add_core__, bool transform_to_rg__)
Generate full charge density (valence + core) and magnetization from the wave functions.
Four-component function consisting of scalar and vector parts.
Compact storage of non-zero Gaunt coefficients .
int num_gaunt(int lm3) const
Return number of non-zero Gaunt coefficients for a given lm3.
gaunt_L1_L2< T > const & gaunt(int lm3, int idx) const
Return a structure containing {lm1, lm2, coef} for a given lm3 and index.
Describes Hubbard orbital occupancy or potential correction matrices.
int num_kpoints() const
Return total number of k-points.
K-point related variables and methods.
double band_occupancy(int j__, int ispn__) const
Get band occupancy.
double weight() const
Return weight of k-point.
int num_occupied_bands(int ispn__=-1) const
Get the number of occupied bands for each spin channel.
Representation of the periodical function on the muffin-tin geometry.
static void convert(int lmax__, double const *f_rlm__, std::complex< double > *f_ylm__)
Convert form Rlm to Ylm representation.
static double gaunt_rrr(int l1, int l2, int l3, int m1, int m2, int m3)
Gaunt coefficent of three real spherical harmonics.
Simulation context is a set of parameters and objects describing a single simulation.
auto gvec_phase_factor(r3::vector< int > G__, int ia__) const
Phase factors .
auto const & gvec() const
Return const reference to Gvec object.
auto const & augmentation_op(int iat__) const
Returns a constant pointer to the augmentation operator of a given atom type.
std::ostream & out() const
Return output stream.
mpi::Communicator const & comm() const
Total communicator of the simulation.
int num_spins() const
Number of spin components.
bool gamma_point(bool gamma_point__)
Set flag for Gamma-point calculation.
int num_spinors() const
Number of spinor wave-functions labeled by a sinlge band index.
int num_mag_comp() const
Number of components in the complex density matrix.
int num_bands(int num_bands__)
Set the number of bands.
int num_mag_dims() const
Number of dimensions in the magnetization vector.
double pw_cutoff() const
Plane-wave cutoff for G-vectors (in 1/[a.u.]).
bool use_symmetry() const
Get a use_symmetry flag.
double omega() const
Unit cell volume.
int max_num_mt_points() const
Maximum number of muffin-tin points among all atom types.
Atom const & atom(int id__) const
Return const atom instance by id.
int num_paw_atoms() const
Return number of atoms with PAW pseudopotential.
int num_atom_symmetry_classes() const
Number of atom symmetry classes.
bool augment() const
Return 'True' if at least one atom in the unit cell has an augmentation charge.
auto const & spl_num_paw_atoms() const
Get split index of PAW atoms.
int num_atom_types() const
Number of atom types.
int max_mt_basis_size() const
Maximum number of basis functions among all atom types.
Atom_type & atom_type(int id__)
Return atom type instance by id.
int max_mt_radial_basis_size() const
Maximum number of radial functions among all atom types.
int num_atoms() const
Number of atoms in the unit cell.
std::vector< double > find_mt_radii(int auto_rmt__, bool inflate__)
Automatically determine new muffin-tin radii as a half distance between neighbor atoms.
double num_valence_electrons() const
Number of valence electrons.
double num_electrons() const
Total number of electrons (core + valence).
Atom_symmetry_class const & atom_symmetry_class(int id__) const
Return const symmetry class instance by class id.
auto paw_atom_index(paw_atom_index_t::global ipaw__) const
Return global index of atom by the index in the list of PAW atoms.
Helper class to wrap stream id (integer number).
auto use_hartree() const
Use Hartree potential in the inner() product for residuals.
void gemm(char transa, char transb, ftn_int m, ftn_int n, ftn_int k, T const *alpha, T const *A, ftn_int lda, T const *B, ftn_int ldb, T const *beta, T *C, ftn_int ldc, acc::stream_id sid=acc::stream_id(-1)) const
General matrix-matrix multiplication.
MPI communicator wrapper.
void allreduce(T *buffer__, int count__) const
Perform the in-place (the output buffer is used as the input buffer) all-to-all reduction.
int rank() const
Rank of MPI process inside communicator.
Multidimensional array with the column-major (Fortran) order.
uint64_t hash(uint64_t h__=5381) const
Compute hash of the array.
void copy_to(memory_t mem__, size_t idx0__, size_t n__, acc::stream_id sid=acc::stream_id(-1))
Copy n elements starting from idx0 from one memory type to another.
void zero(memory_t mem__, size_t idx0__, size_t n__)
Zero n elements starting from idx0.
uint32_t ld() const
Return leading dimension size.
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
T checksum(size_t idx0__, size_t size__) const
Compute checksum.
size_t size() const
Return total size (number of elements) of the array.
Wave-fucntions in the FFT-friendly distribution.
Describe a range of bands.
Contains definition and partial implementation of sirius::Density class.
Generate complex spherical harmonics for the local set of G-vectors.
Generate spherical Bessel functions at the muffin-tin boundary for the local set of G-vectors.
Contains the mixer facttory for creating different types of mixers.
Contains declarations of functions required for mixing.
@ value
the parser finished reading a JSON value
void copyin(T *target__, T const *source__, size_t n__)
Copy memory from host to device.
void sync_stream(stream_id sid__)
Synchronize a single stream.
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
size_t spfft_grid_size(T const &spfft__)
Total size of the SpFFT transformation grid.
FunctionProperties< Periodic_function< double > > periodic_function_property_modified(bool use_coarse_gvec__)
Only for the PP-PW case.
int lmax(int lmmax__)
Get maximum orbital quantum number by the maximum lm index.
int lmmax(int lmax)
Maximum number of combinations for a given .
int lm(int l, int m)
Get composite lm index by angular index l and azimuthal index m.
std::vector< int > l_by_lm(int lmax__)
Get array of orbital quantum numbers for each lm component.
Namespace of the SIRIUS library.
static void add_k_point_contribution_rg_noncollinear(fft::spfft_transform_type< T > &fft__, T w__, T const *inp_wf_up__, T const *inp_wf_dn__, int nr__, sddk::mdarray< std::complex< T >, 1 > &psi_r_up__, sddk::mdarray< T, 2 > &density_rg__)
Compute contribution to density and megnetisation from the 2-component spinor wave-functions.
auto split_in_blocks(int length__, int block_size__)
Split the 'length' elements into blocks with the initial block size.
@ pseudopotential
Pseudopotential (ultrasoft, norm-conserving, PAW).
auto hash(void const *buff, size_t size, uint64_t h=5381)
Simple hash function.
const double y00
First spherical harmonic .
strong_type< int, struct __block_id_tag > block_id
ID of the block.
strong_type< int, struct __rf_index_tag > rf_index
Radial function index.
static void add_k_point_contribution_rg_collinear(fft::spfft_transform_type< T > &fft__, int ispn__, T w__, T const *inp_wf__, int nr__, bool gamma__, sddk::mdarray< T, 2 > &density_rg__)
Compute non-magnetic or up- or dn- contribution of the wave-functions to the charge density.
strong_type< int, struct __n_blocks_tag > n_blocks
Number of blocks to which the global index is split.
auto generate_sbessel_mt(Simulation_context const &ctx__, int lmax__)
Compute values of spherical Bessel functions at MT boundary.
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
auto sum_fg_fl_yg(Simulation_context const &ctx__, int lmax__, std::complex< double > const *fpw__, sddk::mdarray< double, 3 > &fl__, sddk::matrix< std::complex< double > > &gvec_ylm__)
void symmetrize_density_matrix(Unit_cell const &uc__, density_matrix_t &dm__, int num_mag_comp__)
Symmetrize density matrix.
auto generate_gvec_ylm(Simulation_context const &ctx__, int lmax__)
Generate complex spherical harmonics for the local set of G-vectors.
int packed_index(int i__, int j__)
Pack two indices into one for symmetric matrices.
Serialize madarray to json.
static const int ia
Global index of atom.
static const int nbf
Number of beta-projector functions for this atom.
static const int offset
Offset of beta-projectors in this chunk.
static const unsigned int fft_layout
Shuffle to FFT distribution.
LAPW specific function to compute sum over plane-wave coefficients and spherical harmonics.
Symmetrize density matrix of LAPW and PW methods.
Symmetrize density and potential fields (scalar + vector).
Symmetrize occupation matrix of the LDA+U method.