SIRIUS 7.5.0
Electronic structure library and applications
simulation_context.cpp
Go to the documentation of this file.
1// Copyright (c) 2013-2019 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file simulation_context.cpp
21 *
22 * \brief Implementation of Simulation_context class.
23 */
24
25#include <gsl/gsl_sf_bessel.h>
26#include "core/profiler.hpp"
27#include "core/env/env.hpp"
28#include "core/omp.hpp"
31#include "symmetry/lattice.hpp"
37
38namespace sirius {
39
40template <>
41spfft::Transform& Simulation_context::spfft<double>()
42{
43 return *spfft_transform_;
44}
45
46template <>
47spfft::Transform const& Simulation_context::spfft<double>() const
48{
49 return *spfft_transform_;
50}
51
52template <>
53spfft::Transform& Simulation_context::spfft_coarse<double>()
54{
56}
57
58template <>
59spfft::Transform const& Simulation_context::spfft_coarse<double>() const
60{
62}
63
64template <>
65spfft::Grid& Simulation_context::spfft_grid_coarse<double>()
66{
67 return *spfft_grid_coarse_;
68}
69
70#if defined(SIRIUS_USE_FP32)
71template <>
72spfft::TransformFloat& Simulation_context::spfft<float>()
73{
74 return *spfft_transform_float_;
75}
76
77template <>
78spfft::TransformFloat const& Simulation_context::spfft<float>() const
79{
80 return *spfft_transform_float_;
81}
82
83template <>
84spfft::TransformFloat& Simulation_context::spfft_coarse<float>()
85{
86 return *spfft_transform_coarse_float_;
87}
88
89template <>
90spfft::TransformFloat const& Simulation_context::spfft_coarse<float>() const
91{
92 return *spfft_transform_coarse_float_;
93}
94
95template <>
96spfft::GridFloat& Simulation_context::spfft_grid_coarse<float>()
97{
98 return *spfft_grid_coarse_float_;
99}
100#endif
101
102void
104{
105 if (!(cfg().control().fft_mode() == "serial" || cfg().control().fft_mode() == "parallel")) {
106 RTE_THROW("wrong FFT mode");
107 }
108
109 auto rlv = unit_cell().reciprocal_lattice_vectors();
110
111 /* create FFT driver for dense mesh (density and potential) */
112 auto fft_grid = cfg().settings().fft_grid_size();
113 if (fft_grid[0] * fft_grid[1] * fft_grid[2] == 0) {
115 cfg().settings().fft_grid_size(fft_grid_);
116 } else {
117 /* else create a grid with user-specified dimensions */
118 fft_grid_ = fft::Grid(fft_grid);
119 }
120
121 /* create FFT grid for coarse mesh */
123}
124
125
126double
128{
129 /* alpha = 1 / (2*sigma^2), selecting alpha here for better convergence */
130 double lambda{1};
131 double gmax = pw_cutoff();
132 double upper_bound{0};
133 double charge = unit_cell().num_electrons();
134
135 /* iterate to find lambda */
136 do {
137 lambda += 0.1;
138 upper_bound =
139 charge * charge * std::sqrt(2.0 * lambda / twopi) * std::erfc(gmax * std::sqrt(1.0 / (4.0 * lambda)));
140 } while (upper_bound < 1e-8);
141
142 if (lambda < 1.5 && comm().rank() == 0) {
143 std::stringstream s;
144 s << "ewald_lambda(): pw_cutoff is too small";
145 RTE_WARNING(s);
146 }
147 return lambda;
148}
149
150void
152{
153 PROFILE("sirius::Simulation_context::initialize");
154
155 /* can't initialize twice */
156 if (initialized_) {
157 RTE_THROW("Simulation parameters are already initialized.");
158 }
159
160 auto verb_lvl = env::get_value_ptr<int>("SIRIUS_VERBOSITY");
161 if (verb_lvl) {
162 this->verbosity(*verb_lvl);
163 }
164
165 /* setup the output stream */
166 if (this->comm().rank() == 0 && this->verbosity() >= 1) {
167 auto out_str = split(cfg().control().output(), ':');
168 if (out_str.size() != 2) {
169 RTE_THROW("wrong output stream parameter");
170 }
171 if (out_str[0] == "stdout") {
172 output_stream_ = &std::cout;
173 } else if (out_str[0] == "file") {
174 output_file_stream_ = std::ofstream(out_str[1]);
175 output_stream_ = &output_file_stream_;
176 } else {
177 RTE_THROW("unknown output stream type");
178 }
179 } else {
180 output_stream_ = &null_stream();
181 }
182
183 electronic_structure_method(cfg().parameters().electronic_structure_method());
184 core_relativity(cfg().parameters().core_relativity());
185 valence_relativity(cfg().parameters().valence_relativity());
186
187 /* can't run fp-lapw with Gamma point trick */
188 if (full_potential()) {
189 gamma_point(false);
190 }
191
192 /* Gamma-point calculation and non-collinear magnetism are not compatible */
193 if (num_mag_dims() == 3) {
194 gamma_point(false);
195 }
196
197 /* set processing unit type */
198 processing_unit(cfg().control().processing_unit());
199
200 /* check if we can use a GPU device */
201 if (processing_unit() == sddk::device_t::GPU) {
202#if !defined(SIRIUS_GPU)
203 RTE_THROW("not compiled with GPU support!");
204#endif
205 }
206
207 /* initialize MPI communicators */
208 init_comm();
209
210 switch (processing_unit()) {
211 case sddk::device_t::CPU: {
212 host_memory_t_ = sddk::memory_t::host;
213 break;
214 }
215 case sddk::device_t::GPU: {
216 host_memory_t_ = sddk::memory_t::host_pinned;
217 break;
218 }
219 }
220
221 if (processing_unit() == sddk::device_t::GPU) {
222 spla_ctx_.reset(new spla::Context{SPLA_PU_GPU});
223 spla_ctx_->set_tile_size_gpu(1688); // limit GPU memory usage to around 500MB
224 }
225 // share context for blas operations to reduce memory consumption
226 splablas::get_handle_ptr() = spla_ctx_;
227
228 /* can't use reduced G-vectors in LAPW code */
229 if (full_potential()) {
230 cfg().control().reduce_gvec(false);
231 }
232
233 if (!cfg().iterative_solver().type().size() || (cfg().iterative_solver().type() == "auto")) {
234 if (full_potential()) {
235 cfg().iterative_solver().type("exact");
236 } else {
237 cfg().iterative_solver().type("davidson");
238 }
239 }
240 /* set default values for the G-vector cutoff */
241 if (pw_cutoff() <= 0) {
242 pw_cutoff(full_potential() ? 12 : 20);
243 }
244
245 /* initialize variables related to the unit cell */
246 unit_cell().initialize();
247 /* save the volume of the initial unit cell */
248 omega0_ = unit_cell().omega();
249 /* save initial lattice vectors */
250 lattice_vectors0_ = unit_cell().lattice_vectors();
251
252 /* check the lattice symmetries */
253 if (use_symmetry()) {
254 auto lv = r3::matrix<double>(unit_cell().lattice_vectors());
255
256 auto lat_sym = find_lat_sym(lv, cfg().control().spglib_tolerance());
257
258 #pragma omp parallel for
259 for (int i = 0; i < unit_cell().symmetry().size(); i++) {
260 auto& spgR = unit_cell().symmetry()[i].spg_op.R;
261 bool found{false};
262 for (size_t j = 0; j < lat_sym.size(); j++) {
263 auto latR = lat_sym[j];
264 found = true;
265 for (int x : {0, 1, 2}) {
266 for (int y : {0, 1, 2}) {
267 found = found && (spgR(x, y) == latR(x, y));
268 }
269 }
270 if (found) {
271 break;
272 }
273 }
274 if (!found) {
275 RTE_THROW("spglib lattice symmetry was not found in the list of SIRIUS generated symmetries");
276 }
277 }
278 }
279
280 /* force global spin-orbit correction flag if one of the species has spin-orbit */
281 for (int iat = 0; iat < unit_cell().num_atom_types(); iat++) {
282 if (unit_cell().atom_type(iat).spin_orbit_coupling()) {
283 this->so_correction(true);
284 }
285 }
286
287 /* set default values for the G+k-vector cutoff */
288 if (full_potential()) {
289 /* find the cutoff for G+k vectors (derived from rgkmax (aw_cutoff here) and minimum MT radius) */
290 if (aw_cutoff() > 0) {
291 gk_cutoff(aw_cutoff() / unit_cell().min_mt_radius());
292 }
293 if (gk_cutoff() <= 0) {
294 gk_cutoff(3);
295 }
296 } else {
297 /* in pseudopotential case */
298 if (gk_cutoff() <= 0) {
299 gk_cutoff(7);
300 }
301 }
302
303 /* check the G+k cutoff */
304 if (gk_cutoff() * 2 > pw_cutoff()) {
305 std::stringstream s;
306 s << "G+k cutoff is too large for a given plane-wave cutoff" << std::endl
307 << " pw cutoff : " << pw_cutoff() << std::endl
308 << " doubled G+k cutoff : " << gk_cutoff() * 2;
309 RTE_THROW(s);
310 }
311
312 if (full_potential() && (this->gk_cutoff() * this->unit_cell().max_mt_radius() > this->unit_cell().lmax_apw()) &&
313 this->comm().rank() == 0 && this->verbosity() >= 0) {
314 std::stringstream s;
315 s << "G+k cutoff (" << this->gk_cutoff() << ") is too large for a given lmax ("
316 << this->unit_cell().lmax_apw() << ") and a maximum MT radius (" << this->unit_cell().max_mt_radius() << ")"
317 << std::endl
318 << "suggested minimum value for lmax : " << int(this->gk_cutoff() * this->unit_cell().max_mt_radius()) + 1;
319 RTE_WARNING(s);
320 }
321
322 if (!full_potential()) {
323 lmax_rho(-1);
324 lmax_pot(-1);
325 lmax_apw(-1);
326 }
327
328 /* initialize FFT grid dimensions */
330
331 int nbnd = static_cast<int>(unit_cell().num_valence_electrons() / 2.0) +
332 std::max(10, static_cast<int>(0.1 * unit_cell().num_valence_electrons()));
333 if (full_potential()) {
334 /* take 10% of empty non-magnetic states */
335 if (num_fv_states() < 0) {
336 num_fv_states(nbnd);
337 }
338 if (num_fv_states() < static_cast<int>(unit_cell().num_valence_electrons() / 2.0)) {
339 std::stringstream s;
340 s << "not enough first-variational states : " << num_fv_states();
341 RTE_THROW(s);
342 }
343 } else {
344 if (num_mag_dims() == 3) {
345 nbnd *= 2;
346 }
347 /* if number of bands was not set by the host code, set it here */
348 if (num_bands() < 0) {
349 num_bands(nbnd);
350 }
351 }
352
353 std::string evsn[] = {std_evp_solver_name(), gen_evp_solver_name()};
354#if defined(SIRIUS_CUDA)
355 bool is_cuda{true};
356#else
357 bool is_cuda{false};
358#endif
359#if defined(SIRIUS_MAGMA)
360 bool is_magma{true};
361#else
362 bool is_magma{false};
363#endif
364#if defined(SIRIUS_SCALAPACK)
365 bool is_scalapack{true};
366#else
367 bool is_scalapack{false};
368#endif
369#if defined(SIRIUS_ELPA)
370 bool is_elpa{true};
371#else
372 bool is_elpa{false};
373#endif
374#if defined(SIRIUS_DLAF)
375 bool is_dlaf{true};
376#else
377 bool is_dlaf{false};
378#endif
379
380 if (processing_unit() == sddk::device_t::CPU || acc::num_devices() == 0) {
381 is_cuda = false;
382 is_magma = false;
383 }
384
385 int npr = cfg().control().mpi_grid_dims()[0];
386 int npc = cfg().control().mpi_grid_dims()[1];
387
388 /* deduce the default eigen-value solver */
389 for (int i : {0, 1}) {
390 if (evsn[i] == "auto") {
391 /* conditions for sequential diagonalization */
392 if (comm_band().size() == 1 || npc == 1 || npr == 1 || !is_scalapack) {
393 if (full_potential()) {
394 if (is_magma) {
395 evsn[i] = "magma";
396 } else if (is_cuda) {
397 evsn[i] = "cusolver";
398 } else {
399 evsn[i] = "lapack";
400 }
401 } else {
402 if (is_cuda) {
403 evsn[i] = "cusolver";
404 } else if (is_magma && num_bands() > 200) {
405 evsn[i] = "magma";
406 } else {
407 evsn[i] = "lapack";
408 }
409 }
410 } else {
411 if (is_scalapack) {
412 evsn[i] = "scalapack";
413 }
414 if (is_elpa) {
415 evsn[i] = "elpa1";
416 }
417 if (is_dlaf) {
418 evsn[i] = "dlaf";
419 }
420 }
421 }
422 }
423
424 /* environment variable has a highest priority */
425 auto ev_str = env::get_ev_solver();
426 if (ev_str.size()) {
427 evsn[0] = ev_str;
428 evsn[1] = ev_str;
429 }
430
431 std_evp_solver_name(evsn[0]);
432 gen_evp_solver_name(evsn[1]);
433
434 std_evp_solver_ = la::Eigensolver_factory(std_evp_solver_name());
435 gen_evp_solver_ = la::Eigensolver_factory(gen_evp_solver_name());
436
437 auto& std_solver = std_evp_solver();
438 auto& gen_solver = gen_evp_solver();
439
440 if (std_solver.is_parallel() != gen_solver.is_parallel()) {
441 RTE_THROW("both solvers must be sequential or parallel");
442 }
443
444 /* setup BLACS grid */
445 if (std_solver.is_parallel()) {
446 blacs_grid_ = std::make_unique<la::BLACS_grid>(comm_band(), npr, npc);
447 } else {
448 blacs_grid_ = std::make_unique<la::BLACS_grid>(mpi::Communicator::self(), 1, 1);
449 }
450
451 /* setup the cyclic block size */
452 if (cyclic_block_size() < 0) {
453 double a = std::min(std::log2(double(num_bands()) / blacs_grid_->num_ranks_col()),
454 std::log2(double(num_bands()) / blacs_grid_->num_ranks_row()));
455 if (a < 1) {
456 cfg().control().cyclic_block_size(2);
457 } else {
458 cfg().control().cyclic_block_size(
459 static_cast<int>(std::min(128.0, std::pow(2.0, static_cast<int>(a))) + 1e-12));
460 }
461 }
462
463 /* placeholder for augmentation operator for each atom type */
464 augmentation_op_.resize(unit_cell().num_atom_types());
465
466 if (this->hubbard_correction()) {
467 /* if spin orbit coupling or non collinear magnetisms are activated, then
468 we consider the full spherical hubbard correction */
469 if (this->so_correction() || this->num_mag_dims() == 3) {
470 this->cfg().hubbard().simplified(false);
471 }
472 }
473
474 if (cfg().parameters().precision_wf() == "fp32" && cfg().parameters().precision_gs() == "fp64") {
475 double t = std::numeric_limits<float>::epsilon() * 10;
476 auto tol = std::max(cfg().settings().itsol_tol_min(), t);
477 cfg().settings().itsol_tol_min(tol);
478 }
479
480
481 /* set the smearing */
482 smearing(cfg().parameters().smearing());
483
484 /* create G-vectors on the first call to update() */
485 update();
486
487 ::sirius::print_memory_usage(this->out(), FILE_LINE);
488
489 if (verbosity() >= 1 && comm().rank() == 0) {
490 print_info(this->out());
491 }
492
493 auto print_mpi_layout = env::print_mpi_layout();
494
495 if (verbosity() >= 3 || print_mpi_layout) {
496 mpi::pstdout pout(comm());
497 if (comm().rank() == 0) {
498 pout << "MPI rank placement" << std::endl;
499 pout << hbar(136, '-') << std::endl;
500 pout << " | comm tot, band, k | comm fft, ortho | mpi_grid tot, row, col | blacs tot, row, col" << std::endl;
501 }
502 pout << std::setw(12) << hostname() << " | "
503 << std::setw(6) << comm().rank()
504 << std::setw(6) << comm_band().rank()
505 << std::setw(6) << comm_k().rank() << " | "
506 << std::setw(6) << comm_fft_coarse().rank()
507 << std::setw(6) << comm_band_ortho_fft_coarse().rank() << " | "
508 << std::setw(6) << mpi_grid_->communicator(3).rank()
509 << std::setw(6) << mpi_grid_->communicator(1 << 0).rank()
510 << std::setw(6) << mpi_grid_->communicator(1 << 1).rank() << " | "
511 << std::setw(6) << blacs_grid().comm().rank()
512 << std::setw(6) << blacs_grid().comm_row().rank()
513 << std::setw(6) << blacs_grid().comm_col().rank() << std::endl;
514 rte::ostream(this->out(), "info") << pout.flush(0);
515 }
516
517 initialized_ = true;
518 cfg().lock();
519}
520
521void
522Simulation_context::print_info(std::ostream& out__) const
523{
524 {
525 rte::ostream os(out__, "info");
526 tm const* ptm = localtime(&start_time_.tv_sec);
527 char buf[100];
528 strftime(buf, sizeof(buf), "%a, %e %b %Y %H:%M:%S", ptm);
529
530 os << "SIRIUS version : " << sirius::major_version() << "." << sirius::minor_version()
531 << "." << sirius::revision() << std::endl
532 << "git hash : " << sirius::git_hash() << std::endl
533 << "git branch : " << sirius::git_branchname() << std::endl
534 << "build time : " << sirius::build_date() << std::endl
535 << "start time : " << std::string(buf) << std::endl
536 << std::endl
537 << "number of MPI ranks : " << this->comm().size() << std::endl;
538 if (mpi_grid_) {
539 os << "MPI grid :";
540 for (int i : {0, 1}) {
541 os << " " << mpi_grid_->communicator(1 << i).size();
542 }
543 os << std::endl;
544 }
545 os << "maximum number of OMP threads : " << omp_get_max_threads() << std::endl
546 << "number of MPI ranks per node : " << mpi::num_ranks_per_node() << std::endl
547 << "page size (Kb) : " << (get_page_size() >> 10) << std::endl
548 << "number of pages : " << get_num_pages() << std::endl
549 << "available memory (GB) : " << (get_total_memory() >> 30) << std::endl;
550 os << std::endl;
551 }
552 {
553 rte::ostream os(out__, "fft");
554 std::string headers[] = {"FFT context for density and potential", "FFT context for coarse grid"};
555 double cutoffs[] = {pw_cutoff(), 2 * gk_cutoff()};
556 mpi::Communicator const* comms[] = {&comm_fft(), &comm_fft_coarse()};
557 fft::Grid fft_grids[] = {this->fft_grid_, this->fft_coarse_grid_};
558 fft::Gvec const* gvecs[] = {&gvec(), &gvec_coarse()};
559
560 for (int i = 0; i < 2; i++) {
561 os << headers[i] << std::endl
562 << hbar(37, '=') << std::endl
563 << " comm size : " << comms[i]->size() << std::endl
564 << " plane wave cutoff : " << cutoffs[i] << std::endl
565 << " grid size : " << fft_grids[i][0] << " "
566 << fft_grids[i][1] << " "
567 << fft_grids[i][2] << " total : "
568 << fft_grids[i].num_points() << std::endl
569 << " grid limits : " << fft_grids[i].limits(0).first << " "
570 << fft_grids[i].limits(0).second << " "
571 << fft_grids[i].limits(1).first << " "
572 << fft_grids[i].limits(1).second << " "
573 << fft_grids[i].limits(2).first << " "
574 << fft_grids[i].limits(2).second << std::endl
575 << " number of G-vectors within the cutoff : " << gvecs[i]->num_gvec() << std::endl
576 << " local number of G-vectors : " << gvecs[i]->count() << std::endl
577 << " number of G-shells : " << gvecs[i]->num_shells() << std::endl
578 << std::endl;
579 }
580 os << std::endl;
581 }
582 {
583 rte::ostream os(out__, "unit cell");
584 unit_cell().print_info(os, verbosity());
585 }
586 {
587 rte::ostream os(out__, "sym");
588 unit_cell().symmetry().print_info(os, verbosity());
589 }
590 {
591 rte::ostream os(out__, "atom type");
592 for (int i = 0; i < unit_cell().num_atom_types(); i++) {
593 unit_cell().atom_type(i).print_info(os);
594 }
595 }
596 if (this->cfg().control().print_neighbors()) {
597 rte::ostream os(out__, "nghbr");
598 unit_cell().print_nearest_neighbours(os);
599 }
600
601 {
602 rte::ostream os(out__, "info");
603 os << "total nuclear charge : " << unit_cell().total_nuclear_charge() << std::endl
604 << "number of core electrons : " << unit_cell().num_core_electrons() << std::endl
605 << "number of valence electrons : " << unit_cell().num_valence_electrons() << std::endl
606 << "total number of electrons : " << unit_cell().num_electrons() << std::endl
607 << "extra charge : " << cfg().parameters().extra_charge() << std::endl
608 << "total number of aw basis functions : " << unit_cell().mt_aw_basis_size() << std::endl
609 << "total number of lo basis functions : " << unit_cell().mt_lo_basis_size() << std::endl
610 << "number of first-variational states : " << num_fv_states() << std::endl
611 << "number of bands : " << num_bands() << std::endl
612 << "number of spins : " << num_spins() << std::endl
613 << "number of magnetic dimensions : " << num_mag_dims() << std::endl
614 << "number of spinor components : " << num_spinor_comp() << std::endl
615 << "number of spinors per band index : " << num_spinors() << std::endl
616 << "lmax_apw : " << unit_cell().lmax_apw() << std::endl
617 << "lmax_rho : " << lmax_rho() << std::endl
618 << "lmax_pot : " << lmax_pot() << std::endl
619 << "lmax_rf : " << unit_cell().lmax() << std::endl
620 << "smearing type : " << cfg().parameters().smearing().c_str() << std::endl
621 << "smearing width : " << smearing_width() << std::endl
622 << "cyclic block size : " << cyclic_block_size() << std::endl
623 << "|G+k| cutoff : " << gk_cutoff() << std::endl
624 << "symmetry : " << std::boolalpha << use_symmetry() << std::endl
625 << "so_correction : " << std::boolalpha << so_correction() << std::endl;
626
627 std::string reln[] = {"valence relativity : ", "core relativity : "};
629 std::map<relativity_t, std::string> const relm = {
630 {relativity_t::none, "none"},
631 {relativity_t::koelling_harmon, "Koelling-Harmon"},
632 {relativity_t::zora, "zora"},
633 {relativity_t::iora, "iora"},
634 {relativity_t::dirac, "Dirac"}
635 };
636 for (int i = 0; i < 2; i++) {
637 os << reln[i] << relm.at(relt[i]) << std::endl;
638 }
639
640 std::string evsn[] = {"standard eigen-value solver : ", "generalized eigen-value solver : "};
641 la::ev_solver_t evst[] = {std_evp_solver().type(), gen_evp_solver().type()};
642 std::map<la::ev_solver_t, std::string> const evsm = {
643 {la::ev_solver_t::lapack, "LAPACK"},
644 {la::ev_solver_t::scalapack, "ScaLAPACK"},
645 {la::ev_solver_t::elpa, "ELPA"},
646 {la::ev_solver_t::dlaf, "DLA-Future"},
647 {la::ev_solver_t::magma, "MAGMA"},
648 {la::ev_solver_t::magma_gpu, "MAGMA with GPU pointers"},
649 {la::ev_solver_t::cusolver, "cuSOLVER"}
650 };
651 for (int i = 0; i < 2; i++) {
652 os << evsn[i] << evsm.at(evst[i]) << std::endl;
653 }
654 os << "processing unit : ";
655 switch (processing_unit()) {
656 case sddk::device_t::CPU: {
657 os << "CPU" << std::endl;
658 break;
659 }
660 case sddk::device_t::GPU: {
661 os << "GPU" << std::endl;
662 os << "number of devices : " << acc::num_devices() << std::endl;
663 acc::print_device_info(0, os);
664 break;
665 }
666 }
667 os << std::endl
668 << "iterative solver : " << cfg().iterative_solver().type() << std::endl
669 << "number of steps : " << cfg().iterative_solver().num_steps() << std::endl
670 << "subspace size : " << cfg().iterative_solver().subspace_size() << std::endl
671 << "early restart ratio : " << cfg().iterative_solver().early_restart() << std::endl
672 << "precision_wf : " << cfg().parameters().precision_wf() << std::endl
673 << "precision_hs : " << cfg().parameters().precision_hs() << std::endl
674 << "mixer : " << cfg().mixer().type() << std::endl
675 << "mixing beta : " << cfg().mixer().beta() << std::endl
676 << "max_history : " << cfg().mixer().max_history() << std::endl
677 << "use_hartree : " << std::boolalpha << cfg().mixer().use_hartree() << std::endl
678 << std::endl
679 << "spglib version: " << spg_get_major_version() << "." << spg_get_minor_version() << "."
680 << spg_get_micro_version() << std::endl;
681 }
682 {
683 rte::ostream os(out__, "info");
684 unsigned int vmajor, vminor, vmicro;
685 H5get_libversion(&vmajor, &vminor, &vmicro);
686 os << "HDF5 version: " << vmajor << "." << vminor << "." << vmicro << std::endl;
687 }
688 {
689 rte::ostream os(out__, "info");
690 int vmajor, vminor, vmicro;
691 xc_version(&vmajor, &vminor, &vmicro);
692 os << "Libxc version: " << vmajor << "." << vminor << "." << vmicro << std::endl;
693 }
694 {
695 rte::ostream os(out__, "info");
696 int i{1};
697 os << std::endl << "XC functionals" << std::endl
698 << hbar(14, '=') << std::endl;
699 for (auto& xc_label : xc_functionals()) {
700 XC_functional xc(spfft<double>(), unit_cell().lattice_vectors(), xc_label, num_spins());
701#if defined(SIRIUS_USE_VDWXC)
702 if (xc.is_vdw()) {
703 os << "Van der Walls functional" << std::endl
704 << xc.refs() << std::endl;
705 continue;
706 }
707#endif
708 os << i << ") " << xc_label << " : " << xc.name() << std::endl
709 << xc.refs() << std::endl;
710 i++;
711 }
712 }
713
714 if (!full_potential()) {
715 rte::ostream os(out__, "info");
716 os << std::endl
717 << "memory consumption" << std::endl
718 << hbar(18, '=') << std::endl;
719 /* volume of the Brillouin zone */
720 double v0 = std::pow(twopi, 3) / unit_cell().omega();
721 /* volume of the cutoff sphere for wave-functions */
722 double v1 = fourpi * std::pow(gk_cutoff(), 3) / 3;
723 /* volume of the cutoff sphere for density and potential */
724 double v2 = fourpi * std::pow(pw_cutoff(), 3) / 3;
725 /* volume of the cutoff sphere for coarse FFT grid */
726 double v3 = fourpi * std::pow(2 * gk_cutoff(), 3) / 3;
727 /* approximate number of G+k vectors */
728 auto ngk = static_cast<size_t>(v1 / v0);
729 if (gamma_point()) {
730 ngk /= 2;
731 }
732 /* approximate number of G vectors */
733 auto ng = static_cast<size_t>(v2 / v0);
734 if (cfg().control().reduce_gvec()) {
735 ng /= 2;
736 }
737 /* approximate number of coarse G vectors */
738 auto ngc = static_cast<size_t>(v3 / v0);
739 if (cfg().control().reduce_gvec()) {
740 ngc /= 2;
741 }
742 os << "approximate number of G+k vectors : " << ngk << std::endl
743 << "approximate number of G vectors : " << ng << std::endl
744 << "approximate number of coarse G vectors : " << ngc << std::endl;
745 size_t wf_size = ngk * num_bands() * num_spins() * 16;
746 os << "approximate size of wave-functions for each k-point: " << static_cast<int>(wf_size >> 20) << " Mb, "
747 << static_cast<int>((wf_size / comm_band().size()) >> 20) << " Mb/rank" << std::endl;
748
749 /* number of simultaneously treated spin components */
750 int num_sc = (num_mag_dims() == 3) ? 2 : 1;
751 /* number of auxiliary basis functions */
752 int num_phi = cfg().iterative_solver().subspace_size() * num_bands();
753 /* memory consumption for Davidson:
754 - wave-functions psi (num_bands x num_spin)
755 - Hpsi and Spsi (num_bands * num_sc)
756 - auxiliary basis phi (num_bands x num_sc) and also Hphi and Sphi of the same size
757 - residuals (num_bands * num_sc)
758 - beta-projectors (estimated as num_bands)
759
760 Each wave-function is of size ngk
761
762 TODO: add estimation of subspace matrix size (H_{ij} and S_{ij})
763 */
764 size_t tot_size = (num_bands() * num_spins() + 2 * num_bands() * num_sc + 3 * num_phi * num_sc +
765 num_bands() * num_sc + num_bands()) *
766 ngk * sizeof(std::complex<double>);
767 os << "approximate memory consumption of Davidson solver: "
768 << static_cast<int>((tot_size / comm_band().size()) >> 20) << " Mb/rank" << std::endl;
769
770 if (unit_cell().augment()) {
771 /* approximate size of local fraction of G vectors */
772 size_t ngloc = std::max(static_cast<size_t>(1), ng / comm().size());
773 /* upper limit of packed {xi,xi'} bete-projectors index */
774 int nb = unit_cell().max_mt_basis_size() * (unit_cell().max_mt_basis_size() + 1) / 2;
775 /* size of augmentation operator;
776 factor 2 is needed for the estimation of GPU memory, as augmentation operator for two atom types
777 will be stored on GPU and computation will be overlapped with transfer of the next augmentation
778 operator */
779 // TODO: optimize generated_rho_aug() for less memory consumption
780 size_t size_aug = nb * ngloc * sizeof(std::complex<double>);
781 if (unit_cell().num_atom_types() > 1) {
782 size_aug *= 2;
783 }
784
785 /* and two more arrays will be allocated in generate_rho_aug() with 1Gb maximum size each */
786 size_t size1 = nb * ngloc * sizeof(std::complex<double>);
787 size1 = std::min(size1, static_cast<size_t>(1 << 30));
788
789 int max_atoms{0};
790 for (int iat = 0; iat < unit_cell().num_atom_types(); iat++) {
791 max_atoms = std::max(max_atoms, unit_cell().atom_type(iat).num_atoms());
792 }
793 size_t size2 = max_atoms * ngloc * sizeof(std::complex<double>);
794 size2 = std::min(size2, static_cast<size_t>(1 << 30));
795
796 size_aug += (size1 + size2);
797 os << "approximate memory consumption of charge density augmentation: "
798 << static_cast<int>(size_aug >> 20) << " Mb/rank" << std::endl;
799 }
800 /* FFT buffers of fine and coarse meshes */
801 size_t size_fft = spfft<double>().local_slice_size() + spfft_coarse<double>().local_slice_size();
802 size_fft *= sizeof(double);
803 if (!gamma_point()) {
804 size_fft *= 2;
805 }
806 os << "approximate memory consumption of FFT transforms: "
807 << static_cast<int>(size_fft >> 20) << " Mb/rank" << std::endl;
808 }
809}
810
811/** The update of the lattice vectors or atomic positions has an impact on many quantities which have to be
812 recomputed in the correct order. First, the unit cell is updated and the new reciprocal lattice vectors
813 are obtained. Then the G-vectors are computed (if this is the first call to update()) or recomputed with a
814 new reciprocal lattice, but without rebuilding a new list. On the first call the spfft objects are created
815 after G-vectors. */
816void
818{
819 PROFILE("sirius::Simulation_context::update");
820
821 /* update unit cell (reciprocal lattice, etc.) */
822 unit_cell().update();
823
824 /* get new reciprocal vector */
825 auto rlv = unit_cell().reciprocal_lattice_vectors();
826
827 auto spfft_pu = this->processing_unit() == sddk::device_t::CPU ? SPFFT_PU_HOST : SPFFT_PU_GPU;
828
829 /* create a list of G-vectors for corase FFT grid; this is done only once,
830 the next time only reciprocal lattice of the G-vectors is updated */
831 if (!gvec_coarse_) {
832 /* create list of coarse G-vectors */
833 gvec_coarse_ = std::make_unique<fft::Gvec>(rlv, 2 * gk_cutoff(), comm(), cfg().control().reduce_gvec(),
834 cfg().control().spglib_tolerance());
835 /* create FFT friendly partiton */
836 gvec_coarse_fft_ = std::make_shared<fft::Gvec_fft>(*gvec_coarse_, comm_fft_coarse(),
837 comm_ortho_fft_coarse());
838
840
841 /* create spfft buffer for coarse transform */
842 spfft_grid_coarse_ = std::make_unique<spfft::Grid>(fft_coarse_grid_[0], fft_coarse_grid_[1],
843 fft_coarse_grid_[2], gvec_coarse_fft_->zcol_count(),
844 spl_z.local_size(), spfft_pu, -1, comm_fft_coarse().native(), SPFFT_EXCH_DEFAULT);
845#ifdef SIRIUS_USE_FP32
846 spfft_grid_coarse_float_ = std::make_unique<spfft::GridFloat>(fft_coarse_grid_[0], fft_coarse_grid_[1],
847 fft_coarse_grid_[2], gvec_coarse_fft_->zcol_count(), spl_z.local_size(), spfft_pu, -1,
848 comm_fft_coarse().native(), SPFFT_EXCH_DEFAULT);
849#endif
850 /* create spfft transformations */
851 const auto fft_type_coarse = gvec_coarse().reduced() ? SPFFT_TRANS_R2C : SPFFT_TRANS_C2C;
852
853 auto const& gv = gvec_coarse_fft_->gvec_array();
854
855 /* create actual transform object */
856 spfft_transform_coarse_.reset(new spfft::Transform(spfft_grid_coarse_->create_transform(
857 spfft_pu, fft_type_coarse, fft_coarse_grid_[0], fft_coarse_grid_[1], fft_coarse_grid_[2],
858 spl_z.local_size(), gvec_coarse_fft_->count(), SPFFT_INDEX_TRIPLETS,
859 gv.at(sddk::memory_t::host))));
860#ifdef SIRIUS_USE_FP32
861 spfft_transform_coarse_float_.reset(new spfft::TransformFloat(spfft_grid_coarse_float_->create_transform(
862 spfft_pu, fft_type_coarse, fft_coarse_grid_[0], fft_coarse_grid_[1], fft_coarse_grid_[2],
863 spl_z.local_size(), gvec_coarse_fft_->count(), SPFFT_INDEX_TRIPLETS,
864 gv.at(sddk::memory_t::host))));
865#endif
866 } else {
867 gvec_coarse_->lattice_vectors(rlv);
868 }
869
870 /* create a list of G-vectors for dense FFT grid; G-vectors are divided between all available MPI ranks.*/
871 if (!gvec_) {
872 gvec_ = std::make_shared<fft::Gvec>(pw_cutoff(), *gvec_coarse_);
873 gvec_fft_ = std::make_shared<fft::Gvec_fft>(*gvec_, comm_fft(), comm_ortho_fft());
874
875 auto spl_z = fft::split_z_dimension(fft_grid_[2], comm_fft());
876
877 /* create spfft buffer for fine-grained transform */
878 spfft_grid_ = std::unique_ptr<spfft::Grid>(
879 new spfft::Grid(fft_grid_[0], fft_grid_[1], fft_grid_[2],
880 gvec_fft_->zcol_count(), spl_z.local_size(), spfft_pu, -1,
881 comm_fft().native(), SPFFT_EXCH_DEFAULT));
882#if defined(SIRIUS_USE_FP32)
883 spfft_grid_float_ = std::unique_ptr<spfft::GridFloat>(
884 new spfft::GridFloat(fft_grid_[0], fft_grid_[1], fft_grid_[2], gvec_fft_->zcol_count(),
885 spl_z.local_size(), spfft_pu, -1, comm_fft().native(), SPFFT_EXCH_DEFAULT));
886#endif
887 const auto fft_type = gvec().reduced() ? SPFFT_TRANS_R2C : SPFFT_TRANS_C2C;
888
889 auto const& gv = gvec_fft_->gvec_array();
890
891 spfft_transform_.reset(new spfft::Transform(spfft_grid_->create_transform(
892 spfft_pu, fft_type, fft_grid_[0], fft_grid_[1], fft_grid_[2],
893 spl_z.local_size(), gvec_fft_->count(), SPFFT_INDEX_TRIPLETS, gv.at(sddk::memory_t::host))));
894#if defined(SIRIUS_USE_FP32)
895 spfft_transform_float_.reset(new spfft::TransformFloat(spfft_grid_float_->create_transform(
896 spfft_pu, fft_type, fft_grid_[0], fft_grid_[1], fft_grid_[2], spl_z.local_size(),
897 gvec_fft_->count(), SPFFT_INDEX_TRIPLETS, gv.at(sddk::memory_t::host))));
898#endif
899
900 /* copy G-vectors to GPU; this is done once because Miller indices of G-vectors
901 do not change during the execution */
902 switch (this->processing_unit()) {
903 case sddk::device_t::CPU: {
904 break;
905 }
906 case sddk::device_t::GPU: {
907 gvec_coord_ = sddk::mdarray<int, 2>(gvec().count(), 3, sddk::memory_t::host, "gvec_coord_");
908 #pragma omp parallel for schedule(static)
909 for (int igloc = 0; igloc < gvec().count(); igloc++) {
910 auto G = gvec().gvec<index_domain_t::local>(igloc);
911 for (int x : {0, 1, 2}) {
912 gvec_coord_(igloc, x) = G[x];
913 }
914 }
915 gvec_coord_.allocate(sddk::memory_t::device).copy_to(sddk::memory_t::device);
916 break;
917 }
918 }
919 } else {
920 gvec_->lattice_vectors(rlv);
921 }
922
923 /* After each update of the lattice vectors we might get a different set of G-vector shells.
924 * Always update the mapping between the canonical FFT distribution and "local G-shells"
925 * distribution which is used in symmetriezation of lattice periodic functions. */
926 remap_gvec_ = std::make_unique<fft::Gvec_shells>(gvec());
927
928 /* check symmetry of G-vectors */
929 if (unit_cell().num_atoms() != 0 && use_symmetry() && cfg().control().verification() >= 1) {
930 check_gvec(gvec(), unit_cell().symmetry());
931 if (!full_potential()) {
932 check_gvec(gvec_coarse(), unit_cell().symmetry());
933 }
934 check_gvec(*remap_gvec_, unit_cell().symmetry());
935 }
936
937 /* check if FFT grid is OK; this check is especially needed if the grid is set as external parameter */
938 if (cfg().control().verification() >= 0) {
939 #pragma omp parallel for
940 for (int igloc = 0; igloc < gvec().count(); igloc++) {
941 int ig = gvec().offset() + igloc;
942
943 auto gv = gvec().gvec<index_domain_t::local>(igloc);
944 /* check limits */
945 for (int x : {0, 1, 2}) {
946 auto limits = fft_grid().limits(x);
947 /* check boundaries */
948 if (gv[x] < limits.first || gv[x] > limits.second) {
949 std::stringstream s;
950 s << "G-vector is outside of grid limits\n"
951 << " G: " << gv << ", length: " << gvec().gvec_cart<index_domain_t::global>(ig).length() << "\n"
952 << " FFT grid limits: " << fft_grid().limits(0).first << " " << fft_grid().limits(0).second
953 << " " << fft_grid().limits(1).first << " " << fft_grid().limits(1).second << " "
954 << fft_grid().limits(2).first << " " << fft_grid().limits(2).second << "\n"
955 << " FFT grid is not compatible with G-vector cutoff (" << this->pw_cutoff() << ")";
956 RTE_THROW(s);
957 }
958 }
959 }
960 }
961
962 if (unit_cell().num_atoms()) {
963 init_atoms_to_grid_idx(cfg().control().rmt_max());
964 }
965
966 std::pair<int, int> limits(0, 0);
967 for (int x : {0, 1, 2}) {
968 limits.first = std::min(limits.first, fft_grid().limits(x).first);
969 limits.second = std::max(limits.second, fft_grid().limits(x).second);
970 }
971
972 /* recompute phase factors for atoms */
973 phase_factors_ = sddk::mdarray<std::complex<double>, 3>(3, limits, unit_cell().num_atoms(), sddk::memory_t::host, "phase_factors_");
974 #pragma omp parallel for
975 for (int i = limits.first; i <= limits.second; i++) {
976 for (int ia = 0; ia < unit_cell().num_atoms(); ia++) {
977 auto pos = unit_cell().atom(ia).position();
978 for (int x : {0, 1, 2}) {
979 phase_factors_(x, i, ia) = std::exp(std::complex<double>(0.0, twopi * (i * pos[x])));
980 }
981 }
982 }
983
984 /* recompute phase factors for atom types */
985 phase_factors_t_ = sddk::mdarray<std::complex<double>, 2>(gvec().count(), unit_cell().num_atom_types());
986 #pragma omp parallel for schedule(static)
987 for (int igloc = 0; igloc < gvec().count(); igloc++) {
988 /* global index of G-vector */
989 int ig = gvec().offset() + igloc;
990 for (int iat = 0; iat < unit_cell().num_atom_types(); iat++) {
991 std::complex<double> z(0, 0);
992 for (int ia = 0; ia < unit_cell().atom_type(iat).num_atoms(); ia++) {
993 z += gvec_phase_factor(ig, unit_cell().atom_type(iat).atom_id(ia));
994 }
995 phase_factors_t_(igloc, iat) = z;
996 }
997 }
998
999 if (use_symmetry()) {
1000 sym_phase_factors_ = sddk::mdarray<std::complex<double>, 3>(3, limits, unit_cell().symmetry().size());
1001
1002 #pragma omp parallel for
1003 for (int i = limits.first; i <= limits.second; i++) {
1004 for (int isym = 0; isym < unit_cell().symmetry().size(); isym++) {
1005 auto t = unit_cell().symmetry()[isym].spg_op.t;
1006 for (int x : {0, 1, 2}) {
1007 sym_phase_factors_(x, i, isym) = std::exp(std::complex<double>(0.0, twopi * (i * t[x])));
1008 }
1009 }
1010 }
1011 }
1012
1013 switch (this->processing_unit()) {
1014 case sddk::device_t::CPU: {
1015 break;
1016 }
1017 case sddk::device_t::GPU: {
1018 gvec_->gvec_tp().allocate(sddk::memory_t::device).copy_to(sddk::memory_t::device);
1019 break;
1020 }
1021 }
1022
1023 /* create or update radial integrals */
1024 if (!full_potential()) {
1025 /* find the new maximum length of G-vectors */
1026 double new_pw_cutoff{this->pw_cutoff()};
1027 for (int igloc = 0; igloc < gvec().count(); igloc++) {
1028 new_pw_cutoff = std::max(new_pw_cutoff, gvec().gvec_len<index_domain_t::local>(igloc));
1029 }
1030 gvec().comm().allreduce<double, mpi::op_t::max>(&new_pw_cutoff, 1);
1031 /* estimate new G+k-vectors cutoff */
1032 double new_gk_cutoff = this->gk_cutoff();
1033 if (new_pw_cutoff > this->pw_cutoff()) {
1034 new_gk_cutoff += (new_pw_cutoff - this->pw_cutoff());
1035 }
1036
1037 /* radial integrals with pw_cutoff */
1038 if (!ri_.aug_ || ri_.aug_->qmax() < new_pw_cutoff) {
1039 ri_.aug_ = std::make_unique<Radial_integrals_aug<false>>(unit_cell(), new_pw_cutoff,
1040 cfg().settings().nprii_aug(), cb_.aug_ri_);
1041 }
1042
1043 if (!ri_.aug_djl_ || ri_.aug_djl_->qmax() < new_pw_cutoff) {
1044 ri_.aug_djl_ = std::make_unique<Radial_integrals_aug<true>>(unit_cell(), new_pw_cutoff,
1045 cfg().settings().nprii_aug(), cb_.aug_ri_djl_);
1046 }
1047
1048 if (!ri_.ps_core_ || ri_.ps_core_->qmax() < new_pw_cutoff) {
1049 ri_.ps_core_ = std::make_unique<Radial_integrals_rho_core_pseudo<false>>(unit_cell(),
1050 new_pw_cutoff, cfg().settings().nprii_rho_core(), cb_.rhoc_ri_);
1051 }
1052
1053 if (!ri_.ps_core_djl_ || ri_.ps_core_djl_->qmax() < new_pw_cutoff) {
1055 std::make_unique<Radial_integrals_rho_core_pseudo<true>>(unit_cell(), new_pw_cutoff,
1056 cfg().settings().nprii_rho_core(), cb_.rhoc_ri_djl_);
1057 }
1058
1059 if (!ri_.ps_rho_ || ri_.ps_rho_->qmax() < new_pw_cutoff) {
1060 ri_.ps_rho_ = std::make_unique<Radial_integrals_rho_pseudo>(unit_cell(), new_pw_cutoff, 20,
1061 cb_.ps_rho_ri_);
1062 }
1063
1064 if (!ri_.vloc_ || ri_.vloc_->qmax() < new_pw_cutoff) {
1065 ri_.vloc_ = std::make_unique<Radial_integrals_vloc<false>>(unit_cell(), new_pw_cutoff,
1066 cfg().settings().nprii_vloc(), cb_.vloc_ri_);
1067 }
1068
1069 if (!ri_.vloc_djl_ || ri_.vloc_djl_->qmax() < new_pw_cutoff) {
1070 ri_.vloc_djl_ = std::make_unique<Radial_integrals_vloc<true>>(unit_cell(), new_pw_cutoff,
1071 cfg().settings().nprii_vloc(), cb_.vloc_ri_djl_);
1072 }
1073
1074 /* radial integrals with pw_cutoff */
1075 if (!ri_.beta_ || ri_.beta_->qmax() < new_gk_cutoff) {
1076 ri_.beta_ = std::make_unique<Radial_integrals_beta<false>>(unit_cell(), new_gk_cutoff,
1077 cfg().settings().nprii_beta(), cb_.beta_ri_);
1078 }
1079
1080 if (!ri_.beta_djl_ || ri_.beta_djl_->qmax() < new_gk_cutoff) {
1081 ri_.beta_djl_ = std::make_unique<Radial_integrals_beta<true>>(unit_cell(), new_gk_cutoff,
1082 cfg().settings().nprii_beta(), cb_.beta_ri_djl_);
1083 }
1084
1085 auto idxr_wf = [&](int iat) -> radial_functions_index const& {
1086 return unit_cell().atom_type(iat).indexr_wfs();
1087 };
1088
1089 auto ps_wf = [&](int iat, int i) -> Spline<double> const& {
1090 return unit_cell().atom_type(iat).ps_atomic_wf(i).f;
1091 };
1092
1093 if (!ri_.ps_atomic_wf_ || ri_.ps_atomic_wf_->qmax() < new_gk_cutoff) {
1094 ri_.ps_atomic_wf_ = std::make_unique<Radial_integrals_atomic_wf<false>>(unit_cell(), new_gk_cutoff, 20,
1095 idxr_wf, ps_wf, cb_.ps_atomic_wf_ri_);
1096 }
1097
1098 if (!ri_.ps_atomic_wf_djl_ || ri_.ps_atomic_wf_djl_->qmax() < new_gk_cutoff) {
1099 ri_.ps_atomic_wf_djl_ = std::make_unique<Radial_integrals_atomic_wf<true>>(unit_cell(), new_gk_cutoff,
1100 20, idxr_wf, ps_wf, cb_.ps_atomic_wf_ri_djl_);
1101 }
1102
1103 for (int iat = 0; iat < unit_cell().num_atom_types(); iat++) {
1104 if (unit_cell().atom_type(iat).augment() && unit_cell().atom_type(iat).num_atoms() > 0) {
1105 augmentation_op_[iat] = std::make_unique<Augmentation_operator>(unit_cell().atom_type(iat), gvec(),
1106 *ri_.aug_, *ri_.aug_djl_);
1107 augmentation_op_[iat]->generate_pw_coeffs();
1108 } else {
1109 augmentation_op_[iat] = nullptr;
1110 }
1111 }
1112 }
1113
1114 if (full_potential()) {
1115 theta_ = init_step_function(this->unit_cell(), this->gvec(), this->gvec_fft(), this->phase_factors_t(),
1116 *this->spfft_transform_);
1117 }
1118
1119 auto save_config = env::save_config();
1120 if (save_config.size() && this->comm().rank() == 0) {
1121 std::string name;
1122 if (save_config == "all") {
1123 static int count{0};
1124 std::stringstream s;
1125 s << "sirius" << std::setfill('0') << std::setw(6) << count << ".json";
1126 name = s.str();
1127 count++;
1128 } else {
1129 name = save_config;
1130 }
1131 std::ofstream fi(name, std::ofstream::out | std::ofstream::trunc);
1132 auto conf_dict = this->serialize();
1133 fi << conf_dict.dump(4);
1134 }
1135}
1136
1137void
1138Simulation_context::create_storage_file(std::string name__) const
1139{
1140 if (comm_.rank() == 0) {
1141 /* create new hdf5 file */
1142 HDF5_tree fout(name__, hdf5_access_t::truncate);
1143 fout.create_node("parameters");
1144 fout.create_node("effective_potential");
1145 fout.create_node("effective_magnetic_field");
1146 fout.create_node("density");
1147 fout.create_node("magnetization");
1148
1149 for (int j = 0; j < num_mag_dims(); j++) {
1150 fout["magnetization"].create_node(j);
1151 fout["effective_magnetic_field"].create_node(j);
1152 }
1153
1154 fout["parameters"].write("num_spins", num_spins());
1155 fout["parameters"].write("num_mag_dims", num_mag_dims());
1156 fout["parameters"].write("num_bands", num_bands());
1157
1158 sddk::mdarray<int, 2> gv(3, gvec().num_gvec());
1159 for (int ig = 0; ig < gvec().num_gvec(); ig++) {
1160 auto G = gvec().gvec<index_domain_t::global>(ig);
1161 for (int x : {0, 1, 2}) {
1162 gv(x, ig) = G[x];
1163 }
1164 }
1165 fout["parameters"].write("num_gvec", gvec().num_gvec());
1166 fout["parameters"].write("gvec", gv);
1167
1168 fout.create_node("unit_cell");
1169 fout["unit_cell"].create_node("atoms");
1170 for (int j = 0; j < unit_cell().num_atoms(); j++) {
1171 fout["unit_cell"]["atoms"].create_node(j);
1172 fout["unit_cell"]["atoms"][j].write("mt_basis_size", unit_cell().atom(j).mt_basis_size());
1173 }
1174 }
1175 comm_.barrier();
1176}
1177
1178void
1179Simulation_context::generate_phase_factors(int iat__, sddk::mdarray<std::complex<double>, 2>& phase_factors__) const
1180{
1181 PROFILE("sirius::Simulation_context::generate_phase_factors");
1182 int na = unit_cell().atom_type(iat__).num_atoms();
1183 switch (processing_unit_) {
1184 case sddk::device_t::CPU: {
1185 #pragma omp parallel for
1186 for (int igloc = 0; igloc < gvec().count(); igloc++) {
1187 const int ig = gvec().offset() + igloc;
1188 for (int i = 0; i < na; i++) {
1189 int ia = unit_cell().atom_type(iat__).atom_id(i);
1190 phase_factors__(igloc, i) = gvec_phase_factor(ig, ia);
1191 }
1192 }
1193 break;
1194 }
1195 case sddk::device_t::GPU: {
1196#if defined(SIRIUS_GPU)
1197 generate_phase_factors_gpu(gvec().count(), na, gvec_coord().at(sddk::memory_t::device),
1198 unit_cell().atom_coord(iat__).at(sddk::memory_t::device),
1199 phase_factors__.at(sddk::memory_t::device));
1200#endif
1201 break;
1202 }
1203 }
1204}
1205
1206void
1208{
1209 PROFILE("sirius::Simulation_context::init_atoms_to_grid_idx");
1210
1211 auto Rmt = unit_cell().find_mt_radii(1, true);
1212
1213 double R{0};
1214 for (auto e : Rmt) {
1215 R = std::max(e, R);
1216 }
1217
1218 //double R = R__;
1219
1220 atoms_to_grid_idx_.resize(unit_cell().num_atoms());
1221
1222 r3::vector<double> delta(1.0 / spfft<double>().dim_x(), 1.0 / spfft<double>().dim_y(), 1.0 / spfft<double>().dim_z());
1223
1224 const int z_off = spfft<double>().local_z_offset();
1225 r3::vector<int> grid_beg(0, 0, z_off);
1226 r3::vector<int> grid_end(spfft<double>().dim_x(), spfft<double>().dim_y(), z_off + spfft<double>().local_z_length());
1227 std::vector<r3::vector<double>> verts_cart{{-R, -R, -R}, {R, -R, -R}, {-R, R, -R}, {R, R, -R},
1228 {-R, -R, R}, {R, -R, R}, {-R, R, R}, {R, R, R}};
1229
1230 auto bounds_box = [&](r3::vector<double> pos) {
1231 std::vector<r3::vector<double>> verts;
1232
1233 /* pos is a position of atom */
1234 for (auto v : verts_cart) {
1235 verts.push_back(pos + unit_cell().get_fractional_coordinates(v));
1236 }
1237
1238 std::pair<r3::vector<int>, r3::vector<int>> bounds_ind;
1239
1240 for (int x : {0, 1, 2}) {
1241 std::sort(verts.begin(), verts.end(),
1242 [x](r3::vector<double>& a, r3::vector<double>& b) { return a[x] < b[x]; });
1243 bounds_ind.first[x] = std::max(static_cast<int>(verts[0][x] / delta[x]) - 1, grid_beg[x]);
1244 bounds_ind.second[x] = std::min(static_cast<int>(verts[5][x] / delta[x]) + 1, grid_end[x]);
1245 }
1246
1247 return bounds_ind;
1248 };
1249
1250 #pragma omp parallel for
1251 for (int ia = 0; ia < unit_cell().num_atoms(); ia++) {
1252
1253 std::vector<std::pair<int, double>> atom_to_ind_map;
1254
1255 for (int t0 = -1; t0 <= 1; t0++) {
1256 for (int t1 = -1; t1 <= 1; t1++) {
1257 for (int t2 = -1; t2 <= 1; t2++) {
1258 auto pos = unit_cell().atom(ia).position() + r3::vector<double>(t0, t1, t2);
1259
1260 /* find the small box around this atom */
1261 auto box = bounds_box(pos);
1262
1263 for (int j0 = box.first[0]; j0 < box.second[0]; j0++) {
1264 for (int j1 = box.first[1]; j1 < box.second[1]; j1++) {
1265 for (int j2 = box.first[2]; j2 < box.second[2]; j2++) {
1266 auto v = pos - r3::vector<double>(delta[0] * j0, delta[1] * j1, delta[2] * j2);
1267 auto r = unit_cell().get_cartesian_coordinates(v).length();
1268 if (r < Rmt[unit_cell().atom(ia).type_id()]) {
1269 auto ir = fft_grid_.index_by_coord(j0, j1, j2 - z_off);
1270 atom_to_ind_map.push_back({ir, r});
1271 }
1272 }
1273 }
1274 }
1275 }
1276 }
1277 }
1278
1279 atoms_to_grid_idx_[ia] = std::move(atom_to_ind_map);
1280 }
1281}
1282
1283void
1285{
1286 PROFILE("sirius::Simulation_context::init_comm");
1287
1288 /* check MPI grid dimensions and set a default grid if needed */
1289 if (!cfg().control().mpi_grid_dims().size()) {
1290 mpi_grid_dims({1, 1});
1291 }
1292 if (cfg().control().mpi_grid_dims().size() != 2) {
1293 RTE_THROW("wrong MPI grid");
1294 }
1295
1296 const int npr = cfg().control().mpi_grid_dims()[0];
1297 const int npc = cfg().control().mpi_grid_dims()[1];
1298 const int npb = npr * npc;
1299 if (npb <= 0) {
1300 std::stringstream s;
1301 s << "wrong mpi grid dimensions : " << npr << " " << npc;
1302 RTE_THROW(s);
1303 }
1304 const int npk = comm_.size() / npb;
1305 if (npk * npb != comm_.size()) {
1306 std::stringstream s;
1307 s << "Can't divide " << comm_.size() << " ranks into groups of size " << npb;
1308 RTE_THROW(s);
1309 }
1310
1311 /* create k- and band- communicators */
1312 if (comm_k_.is_null() && comm_band_.is_null()) {
1313 comm_band_ = comm_.split(comm_.rank() / npb);
1314 comm_k_ = comm_.split(comm_.rank() % npb);
1315 }
1316
1317 /* setup MPI grid */
1318 mpi_grid_ = std::make_unique<mpi::Grid>(std::vector<int>({npr, npc}), comm_band_);
1319
1320 /* here we know the number of ranks for band parallelization */
1321
1322 /* if we have multiple ranks per node and band parallelization, switch to parallel FFT for coarse mesh */
1323 if ((npr == npb) || (mpi::num_ranks_per_node() > acc::num_devices() && comm_band().size() > 1)) {
1324 cfg().control().fft_mode("parallel");
1325 }
1326
1327 /* create communicator, orthogonal to comm_fft_coarse */
1328 comm_ortho_fft_coarse_ = comm().split(comm_fft_coarse().rank());
1329}
1330
1331} // namespace sirius
Check G-vector symmetry.
Interface to the HDF5 library.
Definition: hdf5_tree.hpp:86
std::unique_ptr< spfft::Transform > spfft_transform_
Fine-grained FFT for density and potential.
auto gvec_phase_factor(r3::vector< int > G__, int ia__) const
Phase factors .
std::vector< std::vector< std::pair< int, double > > > atoms_to_grid_idx_
List of real-space point indices for each of the atoms.
sddk::mdarray< int, 2 > gvec_coord_
Lattice coordinats of G-vectors in a GPU-friendly ordering.
mpi::Communicator const & comm_
Communicator for this simulation.
nlohmann::json serialize()
Export parameters of simulation context as a JSON dictionary.
void init_atoms_to_grid_idx(double R__)
Find a list of real-space grid points around each atom.
mpi::Communicator comm_ortho_fft_coarse_
Auxiliary communicator for the coarse-grid FFT transformation.
timeval start_time_
Creation time of the parameters.
auto const & gvec() const
Return const reference to Gvec object.
auto const & comm_band() const
Band parallelization communicator.
double omega0_
Volume of the initial unit cell.
auto const & comm_fft_coarse() const
Communicator of the coarse FFT grid.
sddk::mdarray< std::complex< double >, 3 > sym_phase_factors_
1D phase factors of the symmetry operations.
void update()
Update context after setting new lattice vectors or atomic coordinates.
std::shared_ptr< fft::Gvec > gvec_coarse_
G-vectors within the 2 * |Gmax^{WF}| cutoff.
void init_comm()
Initialize communicators.
std::shared_ptr< fft::Gvec > gvec_
G-vectors within the Gmax cutoff.
std::vector< std::unique_ptr< Augmentation_operator > > augmentation_op_
Augmentation operator for each atom type.
void init_fft_grid()
Initialize FFT coarse and fine grids.
callback_functions_t cb_
Stores all callback functions.
void initialize()
Initialize the similation (can only be called once).
sddk::mdarray< std::complex< double >, 3 > phase_factors_
1D phase factors for each atom coordinate and G-vector index.
std::unique_ptr< la::BLACS_grid > blacs_grid_
2D BLACS grid for distributed linear algebra operations.
auto const & comm_fft() const
Communicator of the dense FFT grid.
auto const & comm_k() const
Communicator between k-points.
std::shared_ptr<::spla::Context > spla_ctx_
SPLA library context.
void generate_phase_factors(int iat__, sddk::mdarray< std::complex< double >, 2 > &phase_factors__) const
Generate phase factors for all atoms of a given type.
std::unique_ptr< la::Eigensolver > std_evp_solver_
Standard eigen-value problem solver.
sddk::memory_t host_memory_t_
Type of host memory (pagable or page-locked) for the arrays that participate in host-to-device memory...
auto const & gvec_fft() const
Return const reference to Gvec_fft object.
std::unique_ptr< spfft::Transform > spfft_transform_coarse_
Coarse-grained FFT for application of local potential and density summation.
r3::matrix< double > lattice_vectors0_
Initial lattice vectors.
std::ostream & out() const
Return output stream.
sddk::mdarray< std::complex< double >, 2 > phase_factors_t_
Phase factors for atom types.
double ewald_lambda() const
Find the lambda parameter used in the Ewald summation.
std::unique_ptr< la::Eigensolver > gen_evp_solver_
Generalized eigen-value problem solver.
mpi::Communicator const & comm() const
Total communicator of the simulation.
fft::Grid fft_grid_
Grid descriptor for the fine-grained FFT transform.
bool initialized_
True if the context is already initialized.
std::unique_ptr< mpi::Grid > mpi_grid_
MPI grid for this simulation.
step_function_t theta_
Step function in real-space and reciprocal domains.
auto const & comm_band_ortho_fft_coarse() const
Communicator, which is orthogonal to comm_fft_coarse within a band communicator.
radial_integrals_t ri_
Stores all radial integrals.
fft::Grid fft_coarse_grid_
Grid descriptor for the coarse-grained FFT transform.
sddk::device_t processing_unit_
Type of the processing unit.
std::vector< int > mpi_grid_dims(std::vector< int > mpi_grid_dims__)
Set dimensions of MPI grid for band diagonalization problem.
int num_spins() const
Number of spin components.
bool gamma_point(bool gamma_point__)
Set flag for Gamma-point calculation.
int num_spinors() const
Number of spinor wave-functions labeled by a sinlge band index.
relativity_t valence_relativity_
Type of relativity for valence states.
void core_relativity(std::string name__)
Set core relativity for the LAPW method.
int num_bands() const
Total number of bands.
int num_spinor_comp() const
Number of non-zero spinor components.
int num_mag_dims() const
Number of dimensions in the magnetization vector.
double pw_cutoff() const
Plane-wave cutoff for G-vectors (in 1/[a.u.]).
void valence_relativity(std::string name__)
Set valence relativity for the LAPW method.
bool use_symmetry() const
Get a use_symmetry flag.
double gk_cutoff() const
Cutoff for G+k vectors (in 1/[a.u.]).
relativity_t core_relativity_
Type of relativity for core states.
int num_fv_states() const
Number of first-variational states.
std::string gen_evp_solver_name() const
Get the name of the generalized eigen-value solver to use.
std::string std_evp_solver_name() const
Get the name of the standard eigen-value solver to use.
Helper class to create FFT grids of given sizes and compute indices in space- and frequency domains.
Definition: fft3d_grid.hpp:38
int index_by_coord(int x__, int y__, int z__) const
Linear index inside FFT buffer by grid coordinates.
Definition: fft3d_grid.hpp:147
Horisontal bar.
int size() const
Size of the communicator (number of ranks).
int rank() const
Rank of MPI process inside communicator.
Parallel standard output.
Definition: pstdout.hpp:42
Radial basis function index.
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Definition: memory.hpp:1057
Contains definition and partial implementation of sirius::Crystal_symmetry class.
Get the environment variables.
Crystal lattice functions.
Interface to SPLA library.
int num_devices()
Get the number of devices.
Definition: acc.cpp:32
auto get_min_grid(double cutoff__, r3::matrix< double > M__)
Get the minimum grid that circumscribes the cutoff sphere.
Definition: fft3d_grid.hpp:161
auto split_z_dimension(int size_z__, mpi::Communicator const &comm_fft__)
Split z-dimenstion of size_z between MPI ranks of the FFT communicator.
Definition: fft.hpp:236
ev_solver_t
Type of eigen-value solver.
Definition: eigensolver.hpp:37
@ dlaf
DLA-Future solver.
@ magma_gpu
MAGMA with GPU pointers.
@ cusolver
CUDA eigen-solver.
@ magma
MAGMA with CPU pointers.
int num_ranks_per_node()
Get number of ranks per node.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
relativity_t
Type of relativity treatment in the case of LAPW.
Definition: typedefs.hpp:95
auto split(std::string const str__, char delim__)
Split multi-line string into a list of strings.
@ global
Global index.
auto init_step_function(Unit_cell const &uc__, fft::Gvec const &gv__, fft::Gvec_fft const &gvec_fft__, sddk::mdarray< std::complex< double >, 2 > const &phase_factors_t__, fft::spfft_transform_type< double > spfft__)
Unit step function is defined to be 1 in the interstitial and 0 inside muffin-tins.
auto hostname()
Get host name.
const double twopi
Definition: constants.hpp:45
const double fourpi
Definition: constants.hpp:48
Add or substitute OMP functions.
A time-based profiler.
Contains definition and implementation of Simulation_context class.
Get version number and related quantities.
Generate unit step function for LAPW method.
std::function< void(int, int, double *, double *)> ps_rho_ri_
Callback function provided by the host code to compute radial integrals of pseudo charge density.
std::function< void(int, int, double *, double *)> rhoc_ri_djl_
std::function< void(int, double, double *, int)> ps_atomic_wf_ri_
Callback function provided by the host code to compute radial integrals of pseudo atomic wave-functio...
std::function< void(int, double, double *, int)> beta_ri_djl_
std::function< void(int, double, double *, int)> beta_ri_
Callback function provided by the host code to compute radial integrals of beta projectors.
std::function< void(int, double, double *, int, int)> aug_ri_
Callback function provided by the host code to compute radial integrals of augmentation operator.
std::function< void(int, double, double *, int)> ps_atomic_wf_ri_djl_
std::function< void(int, int, double *, double *)> vloc_ri_djl_
Callback function to compute radial integrals of local potential with derivatives of spherical Bessel...
std::function< void(int, int, double *, double *)> rhoc_ri_
Callback function provided by the host code to compute radial integrals of pseudo core charge density...
std::function< void(int, int, double *, double *)> vloc_ri_
Callback function to compute radial integrals of local potential.
std::function< void(int, double, double *, int, int)> aug_ri_djl_
std::unique_ptr< Radial_integrals_beta< true > > beta_djl_
Radial integrals of beta-projectors with derivatives of spherical Bessel functions.
std::unique_ptr< Radial_integrals_rho_pseudo > ps_rho_
Radial integrals of total pseudo-charge density.
std::unique_ptr< Radial_integrals_rho_core_pseudo< true > > ps_core_djl_
Radial integrals of pseudo-core charge density with derivatives of spherical Bessel functions.
std::unique_ptr< Radial_integrals_rho_core_pseudo< false > > ps_core_
Radial integrals of pseudo-core charge density.
std::unique_ptr< Radial_integrals_aug< true > > aug_djl_
Radial integrals of augmentation operator with derivatives of spherical Bessel functions.
std::unique_ptr< Radial_integrals_vloc< true > > vloc_djl_
Radial integrals of the local part of pseudopotential with derivatives of spherical Bessel functions.
std::unique_ptr< Radial_integrals_aug< false > > aug_
Radial integrals of augmentation operator.
std::unique_ptr< Radial_integrals_atomic_wf< true > > ps_atomic_wf_djl_
Radial integrals of atomic wave-functions with derivatives of spherical Bessel functions.
std::unique_ptr< Radial_integrals_atomic_wf< false > > ps_atomic_wf_
Radial integrals of atomic wave-functions.
std::unique_ptr< Radial_integrals_beta< false > > beta_
Radial integrals of beta-projectors.
std::unique_ptr< Radial_integrals_vloc< false > > vloc_
Radial integrals of the local part of pseudopotential.
Contains implementation of sirius::XC_functional class.