25#include <gsl/gsl_sf_bessel.h>
41spfft::Transform& Simulation_context::spfft<double>()
47spfft::Transform
const& Simulation_context::spfft<double>()
const
53spfft::Transform& Simulation_context::spfft_coarse<double>()
59spfft::Transform
const& Simulation_context::spfft_coarse<double>()
const
65spfft::Grid& Simulation_context::spfft_grid_coarse<double>()
67 return *spfft_grid_coarse_;
70#if defined(SIRIUS_USE_FP32)
72spfft::TransformFloat& Simulation_context::spfft<float>()
74 return *spfft_transform_float_;
78spfft::TransformFloat
const& Simulation_context::spfft<float>()
const
80 return *spfft_transform_float_;
84spfft::TransformFloat& Simulation_context::spfft_coarse<float>()
86 return *spfft_transform_coarse_float_;
90spfft::TransformFloat
const& Simulation_context::spfft_coarse<float>()
const
92 return *spfft_transform_coarse_float_;
96spfft::GridFloat& Simulation_context::spfft_grid_coarse<float>()
98 return *spfft_grid_coarse_float_;
105 if (!(cfg().control().fft_mode() ==
"serial" || cfg().control().fft_mode() ==
"parallel")) {
106 RTE_THROW(
"wrong FFT mode");
109 auto rlv = unit_cell().reciprocal_lattice_vectors();
112 auto fft_grid = cfg().settings().fft_grid_size();
113 if (fft_grid[0] * fft_grid[1] * fft_grid[2] == 0) {
115 cfg().settings().fft_grid_size(
fft_grid_);
132 double upper_bound{0};
133 double charge = unit_cell().num_electrons();
139 charge * charge * std::sqrt(2.0 * lambda /
twopi) * std::erfc(gmax * std::sqrt(1.0 / (4.0 * lambda)));
140 }
while (upper_bound < 1e-8);
142 if (lambda < 1.5 &&
comm().rank() == 0) {
144 s <<
"ewald_lambda(): pw_cutoff is too small";
153 PROFILE(
"sirius::Simulation_context::initialize");
157 RTE_THROW(
"Simulation parameters are already initialized.");
160 auto verb_lvl = env::get_value_ptr<int>(
"SIRIUS_VERBOSITY");
162 this->verbosity(*verb_lvl);
166 if (this->
comm().rank() == 0 && this->verbosity() >= 1) {
167 auto out_str =
split(cfg().control().output(),
':');
168 if (out_str.size() != 2) {
169 RTE_THROW(
"wrong output stream parameter");
171 if (out_str[0] ==
"stdout") {
172 output_stream_ = &std::cout;
173 }
else if (out_str[0] ==
"file") {
174 output_file_stream_ = std::ofstream(out_str[1]);
175 output_stream_ = &output_file_stream_;
177 RTE_THROW(
"unknown output stream type");
180 output_stream_ = &null_stream();
183 electronic_structure_method(cfg().parameters().electronic_structure_method());
188 if (full_potential()) {
198 processing_unit(cfg().control().processing_unit());
201 if (processing_unit() == sddk::device_t::GPU) {
202#if !defined(SIRIUS_GPU)
203 RTE_THROW(
"not compiled with GPU support!");
210 switch (processing_unit()) {
211 case sddk::device_t::CPU: {
215 case sddk::device_t::GPU: {
221 if (processing_unit() == sddk::device_t::GPU) {
222 spla_ctx_.reset(
new spla::Context{SPLA_PU_GPU});
229 if (full_potential()) {
230 cfg().control().reduce_gvec(
false);
233 if (!cfg().iterative_solver().type().size() || (cfg().iterative_solver().type() ==
"auto")) {
234 if (full_potential()) {
235 cfg().iterative_solver().type(
"exact");
237 cfg().iterative_solver().type(
"davidson");
246 unit_cell().initialize();
256 auto lat_sym = find_lat_sym(lv, cfg().control().spglib_tolerance());
258 #pragma omp parallel for
259 for (
int i = 0; i < unit_cell().symmetry().size(); i++) {
260 auto& spgR = unit_cell().symmetry()[i].spg_op.R;
262 for (
size_t j = 0; j < lat_sym.size(); j++) {
263 auto latR = lat_sym[j];
265 for (
int x : {0, 1, 2}) {
266 for (
int y : {0, 1, 2}) {
267 found = found && (spgR(x, y) == latR(x, y));
275 RTE_THROW(
"spglib lattice symmetry was not found in the list of SIRIUS generated symmetries");
281 for (
int iat = 0; iat < unit_cell().num_atom_types(); iat++) {
282 if (unit_cell().atom_type(iat).spin_orbit_coupling()) {
283 this->so_correction(
true);
288 if (full_potential()) {
290 if (aw_cutoff() > 0) {
291 gk_cutoff(aw_cutoff() / unit_cell().min_mt_radius());
306 s <<
"G+k cutoff is too large for a given plane-wave cutoff" << std::endl
307 <<
" pw cutoff : " <<
pw_cutoff() << std::endl
308 <<
" doubled G+k cutoff : " <<
gk_cutoff() * 2;
312 if (full_potential() && (this->
gk_cutoff() * this->unit_cell().max_mt_radius() > this->unit_cell().lmax_apw()) &&
313 this->
comm().rank() == 0 && this->verbosity() >= 0) {
315 s <<
"G+k cutoff (" << this->
gk_cutoff() <<
") is too large for a given lmax ("
316 << this->unit_cell().lmax_apw() <<
") and a maximum MT radius (" << this->unit_cell().max_mt_radius() <<
")"
318 <<
"suggested minimum value for lmax : " << int(this->
gk_cutoff() * this->unit_cell().max_mt_radius()) + 1;
322 if (!full_potential()) {
331 int nbnd =
static_cast<int>(unit_cell().num_valence_electrons() / 2.0) +
332 std::max(10,
static_cast<int>(0.1 * unit_cell().num_valence_electrons()));
333 if (full_potential()) {
338 if (
num_fv_states() <
static_cast<int>(unit_cell().num_valence_electrons() / 2.0)) {
340 s <<
"not enough first-variational states : " <<
num_fv_states();
354#if defined(SIRIUS_CUDA)
359#if defined(SIRIUS_MAGMA)
362 bool is_magma{
false};
364#if defined(SIRIUS_SCALAPACK)
365 bool is_scalapack{
true};
367 bool is_scalapack{
false};
369#if defined(SIRIUS_ELPA)
374#if defined(SIRIUS_DLAF)
385 int npr = cfg().control().mpi_grid_dims()[0];
386 int npc = cfg().control().mpi_grid_dims()[1];
389 for (
int i : {0, 1}) {
390 if (evsn[i] ==
"auto") {
392 if (
comm_band().size() == 1 || npc == 1 || npr == 1 || !is_scalapack) {
393 if (full_potential()) {
396 }
else if (is_cuda) {
397 evsn[i] =
"cusolver";
403 evsn[i] =
"cusolver";
404 }
else if (is_magma &&
num_bands() > 200) {
412 evsn[i] =
"scalapack";
425 auto ev_str = env::get_ev_solver();
437 auto& std_solver = std_evp_solver();
438 auto& gen_solver = gen_evp_solver();
440 if (std_solver.is_parallel() != gen_solver.is_parallel()) {
441 RTE_THROW(
"both solvers must be sequential or parallel");
445 if (std_solver.is_parallel()) {
448 blacs_grid_ = std::make_unique<la::BLACS_grid>(mpi::Communicator::self(), 1, 1);
452 if (cyclic_block_size() < 0) {
456 cfg().control().cyclic_block_size(2);
458 cfg().control().cyclic_block_size(
459 static_cast<int>(std::min(128.0, std::pow(2.0,
static_cast<int>(a))) + 1e-12));
466 if (this->hubbard_correction()) {
469 if (this->so_correction() || this->
num_mag_dims() == 3) {
470 this->cfg().hubbard().simplified(
false);
474 if (cfg().parameters().precision_wf() ==
"fp32" && cfg().parameters().precision_gs() ==
"fp64") {
475 double t = std::numeric_limits<float>::epsilon() * 10;
476 auto tol = std::max(cfg().settings().itsol_tol_min(), t);
477 cfg().settings().itsol_tol_min(tol);
482 smearing(cfg().parameters().smearing());
487 ::sirius::print_memory_usage(this->
out(), FILE_LINE);
489 if (verbosity() >= 1 &&
comm().rank() == 0) {
490 print_info(this->
out());
493 auto print_mpi_layout = env::print_mpi_layout();
495 if (verbosity() >= 3 || print_mpi_layout) {
497 if (
comm().rank() == 0) {
498 pout <<
"MPI rank placement" << std::endl;
499 pout <<
hbar(136,
'-') << std::endl;
500 pout <<
" | comm tot, band, k | comm fft, ortho | mpi_grid tot, row, col | blacs tot, row, col" << std::endl;
502 pout << std::setw(12) <<
hostname() <<
" | "
505 << std::setw(6) <<
comm_k().rank() <<
" | "
508 << std::setw(6) <<
mpi_grid_->communicator(3).rank()
509 << std::setw(6) <<
mpi_grid_->communicator(1 << 0).rank()
510 << std::setw(6) <<
mpi_grid_->communicator(1 << 1).rank() <<
" | "
511 << std::setw(6) << blacs_grid().comm().rank()
512 << std::setw(6) << blacs_grid().comm_row().rank()
513 << std::setw(6) << blacs_grid().comm_col().rank() << std::endl;
522Simulation_context::print_info(std::ostream& out__)
const
528 strftime(buf,
sizeof(buf),
"%a, %e %b %Y %H:%M:%S", ptm);
530 os <<
"SIRIUS version : " << sirius::major_version() <<
"." << sirius::minor_version()
531 <<
"." << sirius::revision() << std::endl
532 <<
"git hash : " << sirius::git_hash() << std::endl
533 <<
"git branch : " << sirius::git_branchname() << std::endl
534 <<
"build time : " << sirius::build_date() << std::endl
535 <<
"start time : " << std::string(buf) << std::endl
537 <<
"number of MPI ranks : " << this->
comm().
size() << std::endl;
540 for (
int i : {0, 1}) {
541 os <<
" " <<
mpi_grid_->communicator(1 << i).size();
545 os <<
"maximum number of OMP threads : " << omp_get_max_threads() << std::endl
547 <<
"page size (Kb) : " << (get_page_size() >> 10) << std::endl
548 <<
"number of pages : " << get_num_pages() << std::endl
549 <<
"available memory (GB) : " << (get_total_memory() >> 30) << std::endl;
553 rte::ostream os(out__,
"fft");
554 std::string headers[] = {
"FFT context for density and potential",
"FFT context for coarse grid"};
558 fft::Gvec
const* gvecs[] = {&
gvec(), &gvec_coarse()};
560 for (
int i = 0; i < 2; i++) {
561 os << headers[i] << std::endl
562 << hbar(37,
'=') << std::endl
563 <<
" comm size : " << comms[i]->size() << std::endl
564 <<
" plane wave cutoff : " << cutoffs[i] << std::endl
565 <<
" grid size : " << fft_grids[i][0] <<
" "
566 << fft_grids[i][1] <<
" "
567 << fft_grids[i][2] <<
" total : "
568 << fft_grids[i].num_points() << std::endl
569 <<
" grid limits : " << fft_grids[i].limits(0).first <<
" "
570 << fft_grids[i].limits(0).second <<
" "
571 << fft_grids[i].limits(1).first <<
" "
572 << fft_grids[i].limits(1).second <<
" "
573 << fft_grids[i].limits(2).first <<
" "
574 << fft_grids[i].limits(2).second << std::endl
575 <<
" number of G-vectors within the cutoff : " << gvecs[i]->num_gvec() << std::endl
576 <<
" local number of G-vectors : " << gvecs[i]->count() << std::endl
577 <<
" number of G-shells : " << gvecs[i]->num_shells() << std::endl
583 rte::ostream os(out__,
"unit cell");
584 unit_cell().print_info(os, verbosity());
587 rte::ostream os(out__,
"sym");
588 unit_cell().symmetry().print_info(os, verbosity());
591 rte::ostream os(out__,
"atom type");
592 for (
int i = 0; i < unit_cell().num_atom_types(); i++) {
593 unit_cell().atom_type(i).print_info(os);
596 if (this->cfg().control().print_neighbors()) {
597 rte::ostream os(out__,
"nghbr");
598 unit_cell().print_nearest_neighbours(os);
602 rte::ostream os(out__,
"info");
603 os <<
"total nuclear charge : " << unit_cell().total_nuclear_charge() << std::endl
604 <<
"number of core electrons : " << unit_cell().num_core_electrons() << std::endl
605 <<
"number of valence electrons : " << unit_cell().num_valence_electrons() << std::endl
606 <<
"total number of electrons : " << unit_cell().num_electrons() << std::endl
607 <<
"extra charge : " << cfg().parameters().extra_charge() << std::endl
608 <<
"total number of aw basis functions : " << unit_cell().mt_aw_basis_size() << std::endl
609 <<
"total number of lo basis functions : " << unit_cell().mt_lo_basis_size() << std::endl
610 <<
"number of first-variational states : " <<
num_fv_states() << std::endl
611 <<
"number of bands : " <<
num_bands() << std::endl
612 <<
"number of spins : " <<
num_spins() << std::endl
613 <<
"number of magnetic dimensions : " <<
num_mag_dims() << std::endl
615 <<
"number of spinors per band index : " <<
num_spinors() << std::endl
616 <<
"lmax_apw : " << unit_cell().lmax_apw() << std::endl
617 <<
"lmax_rho : " << lmax_rho() << std::endl
618 <<
"lmax_pot : " << lmax_pot() << std::endl
619 <<
"lmax_rf : " << unit_cell().lmax() << std::endl
620 <<
"smearing type : " << cfg().parameters().smearing().c_str() << std::endl
621 <<
"smearing width : " << smearing_width() << std::endl
622 <<
"cyclic block size : " << cyclic_block_size() << std::endl
623 <<
"|G+k| cutoff : " <<
gk_cutoff() << std::endl
624 <<
"symmetry : " << std::boolalpha <<
use_symmetry() << std::endl
625 <<
"so_correction : " << std::boolalpha << so_correction() << std::endl;
627 std::string reln[] = {
"valence relativity : ",
"core relativity : "};
629 std::map<relativity_t, std::string>
const relm = {
630 {relativity_t::none,
"none"},
631 {relativity_t::koelling_harmon,
"Koelling-Harmon"},
632 {relativity_t::zora,
"zora"},
633 {relativity_t::iora,
"iora"},
634 {relativity_t::dirac,
"Dirac"}
636 for (
int i = 0; i < 2; i++) {
637 os << reln[i] << relm.at(relt[i]) << std::endl;
640 std::string evsn[] = {
"standard eigen-value solver : ",
"generalized eigen-value solver : "};
641 la::ev_solver_t evst[] = {std_evp_solver().type(), gen_evp_solver().type()};
642 std::map<la::ev_solver_t, std::string>
const evsm = {
651 for (
int i = 0; i < 2; i++) {
652 os << evsn[i] << evsm.at(evst[i]) << std::endl;
654 os <<
"processing unit : ";
655 switch (processing_unit()) {
656 case sddk::device_t::CPU: {
657 os <<
"CPU" << std::endl;
660 case sddk::device_t::GPU: {
661 os <<
"GPU" << std::endl;
663 acc::print_device_info(0, os);
668 <<
"iterative solver : " << cfg().iterative_solver().type() << std::endl
669 <<
"number of steps : " << cfg().iterative_solver().num_steps() << std::endl
670 <<
"subspace size : " << cfg().iterative_solver().subspace_size() << std::endl
671 <<
"early restart ratio : " << cfg().iterative_solver().early_restart() << std::endl
672 <<
"precision_wf : " << cfg().parameters().precision_wf() << std::endl
673 <<
"precision_hs : " << cfg().parameters().precision_hs() << std::endl
674 <<
"mixer : " << cfg().mixer().type() << std::endl
675 <<
"mixing beta : " << cfg().mixer().beta() << std::endl
676 <<
"max_history : " << cfg().mixer().max_history() << std::endl
677 <<
"use_hartree : " << std::boolalpha << cfg().mixer().use_hartree() << std::endl
679 <<
"spglib version: " << spg_get_major_version() <<
"." << spg_get_minor_version() <<
"."
680 << spg_get_micro_version() << std::endl;
683 rte::ostream os(out__,
"info");
684 unsigned int vmajor, vminor, vmicro;
685 H5get_libversion(&vmajor, &vminor, &vmicro);
686 os <<
"HDF5 version: " << vmajor <<
"." << vminor <<
"." << vmicro << std::endl;
689 rte::ostream os(out__,
"info");
690 int vmajor, vminor, vmicro;
691 xc_version(&vmajor, &vminor, &vmicro);
692 os <<
"Libxc version: " << vmajor <<
"." << vminor <<
"." << vmicro << std::endl;
695 rte::ostream os(out__,
"info");
697 os << std::endl <<
"XC functionals" << std::endl
698 << hbar(14,
'=') << std::endl;
699 for (
auto& xc_label : xc_functionals()) {
700 XC_functional xc(spfft<double>(), unit_cell().lattice_vectors(), xc_label,
num_spins());
701#if defined(SIRIUS_USE_VDWXC)
703 os <<
"Van der Walls functional" << std::endl
704 << xc.refs() << std::endl;
708 os << i <<
") " << xc_label <<
" : " << xc.name() << std::endl
709 << xc.refs() << std::endl;
714 if (!full_potential()) {
715 rte::ostream os(out__,
"info");
717 <<
"memory consumption" << std::endl
718 << hbar(18,
'=') << std::endl;
720 double v0 = std::pow(
twopi, 3) / unit_cell().omega();
728 auto ngk =
static_cast<size_t>(v1 / v0);
733 auto ng =
static_cast<size_t>(v2 / v0);
734 if (cfg().control().reduce_gvec()) {
738 auto ngc =
static_cast<size_t>(v3 / v0);
739 if (cfg().control().reduce_gvec()) {
742 os <<
"approximate number of G+k vectors : " << ngk << std::endl
743 <<
"approximate number of G vectors : " << ng << std::endl
744 <<
"approximate number of coarse G vectors : " << ngc << std::endl;
746 os <<
"approximate size of wave-functions for each k-point: " <<
static_cast<int>(wf_size >> 20) <<
" Mb, "
747 <<
static_cast<int>((wf_size /
comm_band().size()) >> 20) <<
" Mb/rank" << std::endl;
752 int num_phi = cfg().iterative_solver().subspace_size() *
num_bands();
766 ngk *
sizeof(std::complex<double>);
767 os <<
"approximate memory consumption of Davidson solver: "
768 <<
static_cast<int>((tot_size /
comm_band().size()) >> 20) <<
" Mb/rank" << std::endl;
770 if (unit_cell().augment()) {
772 size_t ngloc = std::max(
static_cast<size_t>(1), ng /
comm().size());
774 int nb = unit_cell().max_mt_basis_size() * (unit_cell().max_mt_basis_size() + 1) / 2;
780 size_t size_aug = nb * ngloc *
sizeof(std::complex<double>);
781 if (unit_cell().num_atom_types() > 1) {
786 size_t size1 = nb * ngloc *
sizeof(std::complex<double>);
787 size1 = std::min(size1,
static_cast<size_t>(1 << 30));
790 for (
int iat = 0; iat < unit_cell().num_atom_types(); iat++) {
791 max_atoms = std::max(max_atoms, unit_cell().atom_type(iat).num_atoms());
793 size_t size2 = max_atoms * ngloc *
sizeof(std::complex<double>);
794 size2 = std::min(size2,
static_cast<size_t>(1 << 30));
796 size_aug += (size1 + size2);
797 os <<
"approximate memory consumption of charge density augmentation: "
798 <<
static_cast<int>(size_aug >> 20) <<
" Mb/rank" << std::endl;
801 size_t size_fft = spfft<double>().local_slice_size() + spfft_coarse<double>().local_slice_size();
802 size_fft *=
sizeof(double);
806 os <<
"approximate memory consumption of FFT transforms: "
807 <<
static_cast<int>(size_fft >> 20) <<
" Mb/rank" << std::endl;
819 PROFILE(
"sirius::Simulation_context::update");
822 unit_cell().update();
825 auto rlv = unit_cell().reciprocal_lattice_vectors();
827 auto spfft_pu = this->processing_unit() == sddk::device_t::CPU ? SPFFT_PU_HOST : SPFFT_PU_GPU;
834 cfg().control().spglib_tolerance());
837 comm_ortho_fft_coarse());
844 spl_z.local_size(), spfft_pu, -1,
comm_fft_coarse().native(), SPFFT_EXCH_DEFAULT);
845#ifdef SIRIUS_USE_FP32
847 fft_coarse_grid_[2], gvec_coarse_fft_->zcol_count(), spl_z.local_size(), spfft_pu, -1,
851 const auto fft_type_coarse = gvec_coarse().reduced() ? SPFFT_TRANS_R2C : SPFFT_TRANS_C2C;
853 auto const& gv = gvec_coarse_fft_->gvec_array();
858 spl_z.local_size(), gvec_coarse_fft_->count(), SPFFT_INDEX_TRIPLETS,
859 gv.at(sddk::memory_t::host))));
860#ifdef SIRIUS_USE_FP32
861 spfft_transform_coarse_float_.reset(
new spfft::TransformFloat(spfft_grid_coarse_float_->create_transform(
863 spl_z.local_size(), gvec_coarse_fft_->count(), SPFFT_INDEX_TRIPLETS,
864 gv.at(sddk::memory_t::host))));
873 gvec_fft_ = std::make_shared<fft::Gvec_fft>(*
gvec_,
comm_fft(), comm_ortho_fft());
878 spfft_grid_ = std::unique_ptr<spfft::Grid>(
880 gvec_fft_->zcol_count(), spl_z.local_size(), spfft_pu, -1,
881 comm_fft().native(), SPFFT_EXCH_DEFAULT));
882#if defined(SIRIUS_USE_FP32)
883 spfft_grid_float_ = std::unique_ptr<spfft::GridFloat>(
885 spl_z.local_size(), spfft_pu, -1,
comm_fft().native(), SPFFT_EXCH_DEFAULT));
887 const auto fft_type =
gvec().reduced() ? SPFFT_TRANS_R2C : SPFFT_TRANS_C2C;
889 auto const& gv = gvec_fft_->gvec_array();
893 spl_z.local_size(), gvec_fft_->count(), SPFFT_INDEX_TRIPLETS, gv.at(sddk::memory_t::host))));
894#if defined(SIRIUS_USE_FP32)
895 spfft_transform_float_.reset(
new spfft::TransformFloat(spfft_grid_float_->create_transform(
897 gvec_fft_->count(), SPFFT_INDEX_TRIPLETS, gv.at(sddk::memory_t::host))));
902 switch (this->processing_unit()) {
903 case sddk::device_t::CPU: {
906 case sddk::device_t::GPU: {
908 #pragma omp parallel for schedule(static)
909 for (
int igloc = 0; igloc <
gvec().count(); igloc++) {
911 for (
int x : {0, 1, 2}) {
920 gvec_->lattice_vectors(rlv);
926 remap_gvec_ = std::make_unique<fft::Gvec_shells>(
gvec());
929 if (unit_cell().num_atoms() != 0 &&
use_symmetry() && cfg().control().verification() >= 1) {
930 check_gvec(
gvec(), unit_cell().symmetry());
931 if (!full_potential()) {
932 check_gvec(gvec_coarse(), unit_cell().symmetry());
934 check_gvec(*remap_gvec_, unit_cell().symmetry());
938 if (cfg().control().verification() >= 0) {
939 #pragma omp parallel for
940 for (
int igloc = 0; igloc <
gvec().count(); igloc++) {
941 int ig =
gvec().offset() + igloc;
945 for (
int x : {0, 1, 2}) {
946 auto limits = fft_grid().limits(x);
948 if (gv[x] < limits.first || gv[x] > limits.second) {
950 s <<
"G-vector is outside of grid limits\n"
952 <<
" FFT grid limits: " << fft_grid().limits(0).first <<
" " << fft_grid().limits(0).second
953 <<
" " << fft_grid().limits(1).first <<
" " << fft_grid().limits(1).second <<
" "
954 << fft_grid().limits(2).first <<
" " << fft_grid().limits(2).second <<
"\n"
955 <<
" FFT grid is not compatible with G-vector cutoff (" << this->
pw_cutoff() <<
")";
962 if (unit_cell().num_atoms()) {
966 std::pair<int, int> limits(0, 0);
967 for (
int x : {0, 1, 2}) {
968 limits.first = std::min(limits.first, fft_grid().limits(x).first);
969 limits.second = std::max(limits.second, fft_grid().limits(x).second);
974 #pragma omp parallel for
975 for (
int i = limits.first; i <= limits.second; i++) {
976 for (
int ia = 0; ia < unit_cell().num_atoms(); ia++) {
977 auto pos = unit_cell().atom(ia).position();
978 for (
int x : {0, 1, 2}) {
986 #pragma omp parallel for schedule(static)
987 for (
int igloc = 0; igloc <
gvec().count(); igloc++) {
989 int ig =
gvec().offset() + igloc;
990 for (
int iat = 0; iat < unit_cell().num_atom_types(); iat++) {
991 std::complex<double> z(0, 0);
992 for (
int ia = 0; ia < unit_cell().atom_type(iat).num_atoms(); ia++) {
1002 #pragma omp parallel for
1003 for (
int i = limits.first; i <= limits.second; i++) {
1004 for (
int isym = 0; isym < unit_cell().symmetry().size(); isym++) {
1005 auto t = unit_cell().symmetry()[isym].spg_op.t;
1006 for (
int x : {0, 1, 2}) {
1013 switch (this->processing_unit()) {
1014 case sddk::device_t::CPU: {
1017 case sddk::device_t::GPU: {
1018 gvec_->gvec_tp().allocate(sddk::memory_t::device).copy_to(sddk::memory_t::device);
1024 if (!full_potential()) {
1026 double new_pw_cutoff{this->
pw_cutoff()};
1027 for (
int igloc = 0; igloc <
gvec().count(); igloc++) {
1028 new_pw_cutoff = std::max(new_pw_cutoff,
gvec().gvec_len<index_domain_t::local>(igloc));
1030 gvec().comm().allreduce<double, mpi::op_t::max>(&new_pw_cutoff, 1);
1032 double new_gk_cutoff = this->
gk_cutoff();
1033 if (new_pw_cutoff > this->
pw_cutoff()) {
1034 new_gk_cutoff += (new_pw_cutoff - this->
pw_cutoff());
1039 ri_.
aug_ = std::make_unique<Radial_integrals_aug<false>>(unit_cell(), new_pw_cutoff,
1044 ri_.
aug_djl_ = std::make_unique<Radial_integrals_aug<true>>(unit_cell(), new_pw_cutoff,
1049 ri_.
ps_core_ = std::make_unique<Radial_integrals_rho_core_pseudo<false>>(unit_cell(),
1050 new_pw_cutoff, cfg().settings().nprii_rho_core(),
cb_.
rhoc_ri_);
1055 std::make_unique<Radial_integrals_rho_core_pseudo<true>>(unit_cell(), new_pw_cutoff,
1060 ri_.
ps_rho_ = std::make_unique<Radial_integrals_rho_pseudo>(unit_cell(), new_pw_cutoff, 20,
1065 ri_.
vloc_ = std::make_unique<Radial_integrals_vloc<false>>(unit_cell(), new_pw_cutoff,
1070 ri_.
vloc_djl_ = std::make_unique<Radial_integrals_vloc<true>>(unit_cell(), new_pw_cutoff,
1076 ri_.
beta_ = std::make_unique<Radial_integrals_beta<false>>(unit_cell(), new_gk_cutoff,
1081 ri_.
beta_djl_ = std::make_unique<Radial_integrals_beta<true>>(unit_cell(), new_gk_cutoff,
1086 return unit_cell().atom_type(iat).indexr_wfs();
1090 return unit_cell().atom_type(iat).ps_atomic_wf(i).f;
1094 ri_.
ps_atomic_wf_ = std::make_unique<Radial_integrals_atomic_wf<false>>(unit_cell(), new_gk_cutoff, 20,
1099 ri_.
ps_atomic_wf_djl_ = std::make_unique<Radial_integrals_atomic_wf<true>>(unit_cell(), new_gk_cutoff,
1103 for (
int iat = 0; iat < unit_cell().num_atom_types(); iat++) {
1104 if (unit_cell().atom_type(iat).augment() && unit_cell().atom_type(iat).num_atoms() > 0) {
1105 augmentation_op_[iat] = std::make_unique<Augmentation_operator>(unit_cell().atom_type(iat),
gvec(),
1114 if (full_potential()) {
1119 auto save_config = env::save_config();
1120 if (save_config.size() && this->comm().rank() == 0) {
1122 if (save_config ==
"all") {
1123 static int count{0};
1124 std::stringstream s;
1125 s <<
"sirius" << std::setfill(
'0') << std::setw(6) << count <<
".json";
1131 std::ofstream fi(name, std::ofstream::out | std::ofstream::trunc);
1133 fi << conf_dict.dump(4);
1138Simulation_context::create_storage_file(std::string name__)
const
1142 HDF5_tree fout(name__, hdf5_access_t::truncate);
1143 fout.create_node(
"parameters");
1144 fout.create_node(
"effective_potential");
1145 fout.create_node(
"effective_magnetic_field");
1146 fout.create_node(
"density");
1147 fout.create_node(
"magnetization");
1150 fout[
"magnetization"].create_node(j);
1151 fout[
"effective_magnetic_field"].create_node(j);
1154 fout[
"parameters"].write(
"num_spins",
num_spins());
1155 fout[
"parameters"].write(
"num_mag_dims",
num_mag_dims());
1156 fout[
"parameters"].write(
"num_bands",
num_bands());
1158 sddk::mdarray<int, 2> gv(3,
gvec().num_gvec());
1159 for (
int ig = 0; ig <
gvec().num_gvec(); ig++) {
1161 for (
int x : {0, 1, 2}) {
1165 fout[
"parameters"].write(
"num_gvec",
gvec().num_gvec());
1166 fout[
"parameters"].write(
"gvec", gv);
1168 fout.create_node(
"unit_cell");
1169 fout[
"unit_cell"].create_node(
"atoms");
1170 for (
int j = 0; j < unit_cell().num_atoms(); j++) {
1171 fout[
"unit_cell"][
"atoms"].create_node(j);
1172 fout[
"unit_cell"][
"atoms"][j].write(
"mt_basis_size", unit_cell().atom(j).mt_basis_size());
1181 PROFILE(
"sirius::Simulation_context::generate_phase_factors");
1182 int na = unit_cell().atom_type(iat__).num_atoms();
1184 case sddk::device_t::CPU: {
1185 #pragma omp parallel for
1186 for (
int igloc = 0; igloc <
gvec().count(); igloc++) {
1187 const int ig =
gvec().offset() + igloc;
1188 for (
int i = 0; i < na; i++) {
1189 int ia = unit_cell().atom_type(iat__).atom_id(i);
1195 case sddk::device_t::GPU: {
1196#if defined(SIRIUS_GPU)
1197 generate_phase_factors_gpu(
gvec().count(), na, gvec_coord().at(sddk::memory_t::device),
1198 unit_cell().atom_coord(iat__).at(sddk::memory_t::device),
1199 phase_factors__.at(sddk::memory_t::device));
1209 PROFILE(
"sirius::Simulation_context::init_atoms_to_grid_idx");
1211 auto Rmt = unit_cell().find_mt_radii(1,
true);
1214 for (
auto e : Rmt) {
1222 r3::vector<double> delta(1.0 / spfft<double>().dim_x(), 1.0 / spfft<double>().dim_y(), 1.0 / spfft<double>().dim_z());
1224 const int z_off = spfft<double>().local_z_offset();
1226 r3::vector<int> grid_end(spfft<double>().dim_x(), spfft<double>().dim_y(), z_off + spfft<double>().local_z_length());
1227 std::vector<r3::vector<double>> verts_cart{{-R, -R, -R}, {R, -R, -R}, {-R, R, -R}, {R, R, -R},
1228 {-R, -R, R}, {R, -R, R}, {-R, R, R}, {R, R, R}};
1231 std::vector<r3::vector<double>> verts;
1234 for (
auto v : verts_cart) {
1235 verts.push_back(pos + unit_cell().get_fractional_coordinates(v));
1240 for (
int x : {0, 1, 2}) {
1241 std::sort(verts.begin(), verts.end(),
1243 bounds_ind.first[x] = std::max(
static_cast<int>(verts[0][x] / delta[x]) - 1, grid_beg[x]);
1244 bounds_ind.second[x] = std::min(
static_cast<int>(verts[5][x] / delta[x]) + 1, grid_end[x]);
1250 #pragma omp parallel for
1251 for (
int ia = 0; ia < unit_cell().num_atoms(); ia++) {
1253 std::vector<std::pair<int, double>> atom_to_ind_map;
1255 for (
int t0 = -1; t0 <= 1; t0++) {
1256 for (
int t1 = -1; t1 <= 1; t1++) {
1257 for (
int t2 = -1; t2 <= 1; t2++) {
1261 auto box = bounds_box(pos);
1263 for (
int j0 = box.first[0]; j0 < box.second[0]; j0++) {
1264 for (
int j1 = box.first[1]; j1 < box.second[1]; j1++) {
1265 for (
int j2 = box.first[2]; j2 < box.second[2]; j2++) {
1267 auto r = unit_cell().get_cartesian_coordinates(v).length();
1268 if (r < Rmt[unit_cell().atom(ia).type_id()]) {
1270 atom_to_ind_map.push_back({ir, r});
1286 PROFILE(
"sirius::Simulation_context::init_comm");
1293 RTE_THROW(
"wrong MPI grid");
1296 const int npr = cfg().control().mpi_grid_dims()[0];
1297 const int npc = cfg().control().mpi_grid_dims()[1];
1298 const int npb = npr * npc;
1300 std::stringstream s;
1301 s <<
"wrong mpi grid dimensions : " << npr <<
" " << npc;
1306 std::stringstream s;
1307 s <<
"Can't divide " <<
comm_.
size() <<
" ranks into groups of size " << npb;
1312 if (comm_k_.is_null() && comm_band_.is_null()) {
1318 mpi_grid_ = std::make_unique<mpi::Grid>(std::vector<int>({npr, npc}), comm_band_);
1324 cfg().control().fft_mode(
"parallel");
Interface to the HDF5 library.
std::unique_ptr< spfft::Transform > spfft_transform_
Fine-grained FFT for density and potential.
auto gvec_phase_factor(r3::vector< int > G__, int ia__) const
Phase factors .
std::vector< std::vector< std::pair< int, double > > > atoms_to_grid_idx_
List of real-space point indices for each of the atoms.
sddk::mdarray< int, 2 > gvec_coord_
Lattice coordinats of G-vectors in a GPU-friendly ordering.
mpi::Communicator const & comm_
Communicator for this simulation.
nlohmann::json serialize()
Export parameters of simulation context as a JSON dictionary.
void init_atoms_to_grid_idx(double R__)
Find a list of real-space grid points around each atom.
mpi::Communicator comm_ortho_fft_coarse_
Auxiliary communicator for the coarse-grid FFT transformation.
timeval start_time_
Creation time of the parameters.
auto const & gvec() const
Return const reference to Gvec object.
auto const & comm_band() const
Band parallelization communicator.
double omega0_
Volume of the initial unit cell.
auto const & comm_fft_coarse() const
Communicator of the coarse FFT grid.
sddk::mdarray< std::complex< double >, 3 > sym_phase_factors_
1D phase factors of the symmetry operations.
void update()
Update context after setting new lattice vectors or atomic coordinates.
std::shared_ptr< fft::Gvec > gvec_coarse_
G-vectors within the 2 * |Gmax^{WF}| cutoff.
void init_comm()
Initialize communicators.
std::shared_ptr< fft::Gvec > gvec_
G-vectors within the Gmax cutoff.
std::vector< std::unique_ptr< Augmentation_operator > > augmentation_op_
Augmentation operator for each atom type.
void init_fft_grid()
Initialize FFT coarse and fine grids.
callback_functions_t cb_
Stores all callback functions.
void initialize()
Initialize the similation (can only be called once).
sddk::mdarray< std::complex< double >, 3 > phase_factors_
1D phase factors for each atom coordinate and G-vector index.
std::unique_ptr< la::BLACS_grid > blacs_grid_
2D BLACS grid for distributed linear algebra operations.
auto const & comm_fft() const
Communicator of the dense FFT grid.
auto const & comm_k() const
Communicator between k-points.
std::shared_ptr<::spla::Context > spla_ctx_
SPLA library context.
void generate_phase_factors(int iat__, sddk::mdarray< std::complex< double >, 2 > &phase_factors__) const
Generate phase factors for all atoms of a given type.
std::unique_ptr< la::Eigensolver > std_evp_solver_
Standard eigen-value problem solver.
sddk::memory_t host_memory_t_
Type of host memory (pagable or page-locked) for the arrays that participate in host-to-device memory...
auto const & gvec_fft() const
Return const reference to Gvec_fft object.
std::unique_ptr< spfft::Transform > spfft_transform_coarse_
Coarse-grained FFT for application of local potential and density summation.
r3::matrix< double > lattice_vectors0_
Initial lattice vectors.
std::ostream & out() const
Return output stream.
sddk::mdarray< std::complex< double >, 2 > phase_factors_t_
Phase factors for atom types.
double ewald_lambda() const
Find the lambda parameter used in the Ewald summation.
std::unique_ptr< la::Eigensolver > gen_evp_solver_
Generalized eigen-value problem solver.
mpi::Communicator const & comm() const
Total communicator of the simulation.
fft::Grid fft_grid_
Grid descriptor for the fine-grained FFT transform.
bool initialized_
True if the context is already initialized.
std::unique_ptr< mpi::Grid > mpi_grid_
MPI grid for this simulation.
step_function_t theta_
Step function in real-space and reciprocal domains.
auto const & comm_band_ortho_fft_coarse() const
Communicator, which is orthogonal to comm_fft_coarse within a band communicator.
radial_integrals_t ri_
Stores all radial integrals.
fft::Grid fft_coarse_grid_
Grid descriptor for the coarse-grained FFT transform.
sddk::device_t processing_unit_
Type of the processing unit.
std::vector< int > mpi_grid_dims(std::vector< int > mpi_grid_dims__)
Set dimensions of MPI grid for band diagonalization problem.
int num_spins() const
Number of spin components.
bool gamma_point(bool gamma_point__)
Set flag for Gamma-point calculation.
int num_spinors() const
Number of spinor wave-functions labeled by a sinlge band index.
relativity_t valence_relativity_
Type of relativity for valence states.
void core_relativity(std::string name__)
Set core relativity for the LAPW method.
int num_bands() const
Total number of bands.
int num_spinor_comp() const
Number of non-zero spinor components.
int num_mag_dims() const
Number of dimensions in the magnetization vector.
double pw_cutoff() const
Plane-wave cutoff for G-vectors (in 1/[a.u.]).
void valence_relativity(std::string name__)
Set valence relativity for the LAPW method.
bool use_symmetry() const
Get a use_symmetry flag.
double gk_cutoff() const
Cutoff for G+k vectors (in 1/[a.u.]).
relativity_t core_relativity_
Type of relativity for core states.
int num_fv_states() const
Number of first-variational states.
std::string gen_evp_solver_name() const
Get the name of the generalized eigen-value solver to use.
std::string std_evp_solver_name() const
Get the name of the standard eigen-value solver to use.
Helper class to create FFT grids of given sizes and compute indices in space- and frequency domains.
int index_by_coord(int x__, int y__, int z__) const
Linear index inside FFT buffer by grid coordinates.
int size() const
Size of the communicator (number of ranks).
int rank() const
Rank of MPI process inside communicator.
Parallel standard output.
Radial basis function index.
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Contains definition and partial implementation of sirius::Crystal_symmetry class.
Get the environment variables.
Crystal lattice functions.
Interface to SPLA library.
int num_devices()
Get the number of devices.
auto get_min_grid(double cutoff__, r3::matrix< double > M__)
Get the minimum grid that circumscribes the cutoff sphere.
auto split_z_dimension(int size_z__, mpi::Communicator const &comm_fft__)
Split z-dimenstion of size_z between MPI ranks of the FFT communicator.
ev_solver_t
Type of eigen-value solver.
@ magma_gpu
MAGMA with GPU pointers.
@ cusolver
CUDA eigen-solver.
@ magma
MAGMA with CPU pointers.
int num_ranks_per_node()
Get number of ranks per node.
Namespace of the SIRIUS library.
relativity_t
Type of relativity treatment in the case of LAPW.
auto split(std::string const str__, char delim__)
Split multi-line string into a list of strings.
auto init_step_function(Unit_cell const &uc__, fft::Gvec const &gv__, fft::Gvec_fft const &gvec_fft__, sddk::mdarray< std::complex< double >, 2 > const &phase_factors_t__, fft::spfft_transform_type< double > spfft__)
Unit step function is defined to be 1 in the interstitial and 0 inside muffin-tins.
auto hostname()
Get host name.
Add or substitute OMP functions.
Contains definition and implementation of Simulation_context class.
Get version number and related quantities.
Generate unit step function for LAPW method.
std::function< void(int, int, double *, double *)> ps_rho_ri_
Callback function provided by the host code to compute radial integrals of pseudo charge density.
std::function< void(int, int, double *, double *)> rhoc_ri_djl_
std::function< void(int, double, double *, int)> ps_atomic_wf_ri_
Callback function provided by the host code to compute radial integrals of pseudo atomic wave-functio...
std::function< void(int, double, double *, int)> beta_ri_djl_
std::function< void(int, double, double *, int)> beta_ri_
Callback function provided by the host code to compute radial integrals of beta projectors.
std::function< void(int, double, double *, int, int)> aug_ri_
Callback function provided by the host code to compute radial integrals of augmentation operator.
std::function< void(int, double, double *, int)> ps_atomic_wf_ri_djl_
std::function< void(int, int, double *, double *)> vloc_ri_djl_
Callback function to compute radial integrals of local potential with derivatives of spherical Bessel...
std::function< void(int, int, double *, double *)> rhoc_ri_
Callback function provided by the host code to compute radial integrals of pseudo core charge density...
std::function< void(int, int, double *, double *)> vloc_ri_
Callback function to compute radial integrals of local potential.
std::function< void(int, double, double *, int, int)> aug_ri_djl_
std::unique_ptr< Radial_integrals_beta< true > > beta_djl_
Radial integrals of beta-projectors with derivatives of spherical Bessel functions.
std::unique_ptr< Radial_integrals_rho_pseudo > ps_rho_
Radial integrals of total pseudo-charge density.
std::unique_ptr< Radial_integrals_rho_core_pseudo< true > > ps_core_djl_
Radial integrals of pseudo-core charge density with derivatives of spherical Bessel functions.
std::unique_ptr< Radial_integrals_rho_core_pseudo< false > > ps_core_
Radial integrals of pseudo-core charge density.
std::unique_ptr< Radial_integrals_aug< true > > aug_djl_
Radial integrals of augmentation operator with derivatives of spherical Bessel functions.
std::unique_ptr< Radial_integrals_vloc< true > > vloc_djl_
Radial integrals of the local part of pseudopotential with derivatives of spherical Bessel functions.
std::unique_ptr< Radial_integrals_aug< false > > aug_
Radial integrals of augmentation operator.
std::unique_ptr< Radial_integrals_atomic_wf< true > > ps_atomic_wf_djl_
Radial integrals of atomic wave-functions with derivatives of spherical Bessel functions.
std::unique_ptr< Radial_integrals_atomic_wf< false > > ps_atomic_wf_
Radial integrals of atomic wave-functions.
std::unique_ptr< Radial_integrals_beta< false > > beta_
Radial integrals of beta-projectors.
std::unique_ptr< Radial_integrals_vloc< false > > vloc_
Radial integrals of the local part of pseudopotential.
Contains implementation of sirius::XC_functional class.