29template <
typename T, sync_band_t what>
32 PROFILE(
"sirius::K_point_set::sync_band");
35 get_memory_pool(sddk::memory_t::host),
"K_point_set::sync_band.data");
41 auto kp = this->get<T>(it.i);
43 case sync_band_t::energy: {
44 std::copy(&kp->band_energies_(0, 0), &kp->band_energies_(0, 0) + nb, &data(0, 0, it.i));
47 case sync_band_t::occupancy: {
48 std::copy(&kp->band_occupancies_(0, 0), &kp->band_occupancies_(0, 0) + nb, &data(0, 0, it.i));
54 comm().allreduce(data.at(sddk::memory_t::host),
static_cast<int>(data.
size()));
56 #pragma omp parallel for
58 auto kp = this->get<T>(ik);
60 case sync_band_t::energy: {
61 std::copy(&data(0, 0, ik), &data(0, 0, ik) + nb, &kp->band_energies_(0, 0));
64 case sync_band_t::occupancy: {
65 std::copy(&data(0, 0, ik), &data(0, 0, ik) + nb, &kp->band_occupancies_(0, 0));
74K_point_set::sync_band<double, sync_band_t::energy>();
78K_point_set::sync_band<double, sync_band_t::occupancy>();
80#if defined(SIRIUS_USE_FP32)
83K_point_set::sync_band<float, sync_band_t::energy>();
87K_point_set::sync_band<float, sync_band_t::occupancy>();
92 PROFILE(
"sirius::K_point_set::create_k_mesh");
96 std::vector<double> wk;
98 auto result = get_irreducible_reciprocal_mesh(
ctx_.unit_cell().symmetry(), k_grid__, k_shift__);
99 nk = std::get<0>(result);
100 wk = std::get<1>(result);
101 auto tmp = std::get<2>(result);
103 for (
int i = 0; i < nk; i++) {
104 for (
int x : {0, 1, 2}) {
105 kp(x, i) = tmp[i][x];
109 nk = k_grid__[0] * k_grid__[1] * k_grid__[2];
110 wk = std::vector<double>(nk, 1.0 / nk);
114 for (
int i0 = 0; i0 < k_grid__[0]; i0++) {
115 for (
int i1 = 0; i1 < k_grid__[1]; i1++) {
116 for (
int i2 = 0; i2 < k_grid__[2]; i2++) {
117 kp(0, ik) = double(i0 + k_shift__[0] / 2.0) / k_grid__[0];
118 kp(1, ik) = double(i1 + k_shift__[1] / 2.0) / k_grid__[1];
119 kp(2, ik) = double(i2 + k_shift__[2] / 2.0) / k_grid__[2];
126 for (
int ik = 0; ik < nk; ik++) {
135 if (this->initialized_) {
136 RTE_THROW(
"K-point set is already initialized");
138 PROFILE(
"sirius::K_point_set::initialize");
140 if (counts.empty()) {
143 block_id(comm().rank()), spl_tmp.counts());
151#if defined(SIRIUS_USE_FP32)
152 kpoints_float_[it.i]->initialize();
156 if (
ctx_.verbosity() > 0) {
159 print_memory_usage(
ctx_.
out(), FILE_LINE);
160 this->initialized_ =
true;
165double bisection_search(F&& f,
double a,
double b,
double tol,
int maxstep=1000)
171 while (std::abs(fi) >= tol) {
183 if (step > maxstep) {
185 s <<
"search of band occupancies failed after 10000 steps";
205template <
class Nt,
class DNt,
class D2Nt>
211 const double tol_ne = 1e-2;
216 std::vector<double> ys;
223 if ( std::abs(N(mu) - ne) < tol) {
234 double ddNf = ddN(mu);
237 double dF = 2 * (Nf - ne) * dNf;
238 double ddF = 2 * dNf * dNf + 2 * (Nf - ne) * ddNf;
239 double step = alpha * dF / std::abs(ddF);
242 res.ys.push_back(mu);
244 if (std::abs(ddF) < 1e-30) {
246 s <<
"Newton minimization (Fermi energy) failed because 2nd derivative too close to zero!"
247 << std::setprecision(8) << std::abs(Nf - ne) <<
"\n";
251 if (std::abs(step) < tol || std::abs(Nf - ne) < tol) {
252 if (std::abs(Nf - ne) > tol_ne) {
254 s <<
"Newton minimization (Fermi energy) got stuck in a local minimum. Fallback to bisection search." <<
"\n";
264 if (iter > maxstep) {
266 s <<
"Newton minimization (chemical potential) failed after " << maxstep <<
" steps!" << std::endl
267 <<
"target number of electrons : " << ne << std::endl
268 <<
"initial guess for chemical potential : " << mu0 << std::endl
269 <<
"current value of chemical potential : " << mu;
278 PROFILE(
"sirius::K_point_set::find_band_occupancies");
282 auto band_occ_callback =
ctx_.band_occ_callback();
283 if (band_occ_callback) {
289 const double ne_target =
ctx_.unit_cell().num_valence_electrons() -
ctx_.cfg().parameters().extra_charge();
307 #pragma omp parallel for
314 this->sync_band<T, sync_band_t::occupancy>();
318 if (
ctx_.smearing_width() == 0) {
319 RTE_THROW(
"zero smearing width");
324 auto emin = std::numeric_limits<double>::max();
325 auto emax = std::numeric_limits<double>::lowest();
327 #pragma omp parallel for reduction(min:emin) reduction(max:emax)
330 emin = std::min(emin, this->get<T>(it.i)->band_energy(0, ispn));
331 emax = std::max(emax, this->get<T>(it.i)->band_energy(
ctx_.
num_bands() - 1, ispn));
334 comm().allreduce<double, mpi::op_t::min>(&emin, 1);
335 comm().allreduce<double, mpi::op_t::max>(&emax, 1);
340 auto compute_ne = [&](
double ef,
auto&& f) {
344 #pragma omp parallel reduction(+ : tmp)
347 for (
int j = 0; j < splb.local_size(); j++) {
348 tmp += f(ef - this->get<T>(it.i)->band_energy(splb.global_index(j), ispn)) *
ctx_.
max_occupancy();
351 ne += tmp *
kpoints_[it.i]->weight();
358 std::function<double(
double)> f;
359 if (
ctx_.smearing() == smearing::smearing_t::cold ||
ctx_.smearing() == smearing::smearing_t::methfessel_paxton) {
361 f = [&](
double x) {
return smearing::gaussian::occupancy(x,
ctx_.smearing_width()); };
363 f = smearing::occupancy(
ctx_.smearing(),
ctx_.smearing_width());
367 auto F = [&compute_ne, ne_target, &f](
double x) {
return compute_ne(x, f) - ne_target; };
371 if (
ctx_.smearing() == smearing::smearing_t::cold ||
ctx_.smearing() == smearing::smearing_t::methfessel_paxton) {
372 f = smearing::occupancy(
ctx_.smearing(),
ctx_.smearing_width());
373 auto df = smearing::delta(
ctx_.smearing(),
ctx_.smearing_width());
374 auto ddf = smearing::dxdelta(
ctx_.smearing(),
ctx_.smearing_width());
375 auto N = [&](
double mu) {
return compute_ne(mu, f); };
376 auto dN = [&](
double mu) {
return compute_ne(mu, df); };
377 auto ddN = [&](
double mu) {
return compute_ne(mu, ddf); };
380 if (
ctx_.verbosity() >= 2) {
381 RTE_OUT(
ctx_.
out()) <<
"newton iteration converged after " << res_newton.iter <<
" steps\n";
384 }
catch(std::exception
const& e) {
385 if (
ctx_.verbosity() >= 2) {
386 RTE_OUT(
ctx_.
out()) << e.what() << std::endl
387 <<
"fallback to bisection search" << std::endl;
389 f = smearing::occupancy(
ctx_.smearing(),
ctx_.smearing_width());
390 auto F = [&compute_ne, ne_target, &f](
double x) {
return compute_ne(x, f) - ne_target; };
396 #pragma omp parallel for
399 this->get<T>(it.i)->band_occupancy(j, ispn, o);
404 this->sync_band<T, sync_band_t::occupancy>();
408 int nve =
static_cast<int>(ne_target + 1e-12);
409 if (
ctx_.
num_spins() == 2 || (std::abs(nve - ne_target) < 1e-12 && nve % 2 == 0)) {
416 std::pair<double, double> eminmax;
417 eminmax.first = std::numeric_limits<double>::max();
418 eminmax.second = std::numeric_limits<double>::lowest();
421 eminmax.first = std::min(eminmax.first, this->get<T>(ik)->band_energy(j, ispn));
422 eminmax.second = std::max(eminmax.second, this->get<T>(ik)->band_energy(j, ispn));
429 std::sort(eband.begin(), eband.end());
436 if (eband[ist].first > eband[ist - 1].second) {
437 band_gap_ = eband[ist].first - eband[ist - 1].second;
443void K_point_set::find_band_occupancies<double>();
444#if defined(SIRIUS_USE_FP32)
446void K_point_set::find_band_occupancies<float>();
457 auto const& kp = this->get<T>(it.i);
459 #pragma omp parallel for reduction(+:tmp)
474 if (
ctx_.cfg().parameters().precision_wf() ==
"fp32") {
475#if defined(SIRIUS_USE_FP32)
476 return this->valence_eval_sum<float>();
478 RTE_THROW(
"not compiled with FP32 support");
482 return this->valence_eval_sum<double>();
491 double ne_target =
ctx_.unit_cell().num_valence_electrons() -
ctx_.cfg().parameters().extra_charge();
500 auto f = smearing::entropy(
ctx_.smearing(),
ctx_.smearing_width());
505 auto const& kp = this->get<T>(it.i);
507 #pragma omp parallel for reduction(+:tmp)
513 s_sum += kp->weight() * tmp;
522 if (
ctx_.cfg().parameters().precision_wf() ==
"fp32") {
523#if defined(SIRIUS_USE_FP32)
524 return this->entropy_sum<float>();
526 RTE_THROW(
"not compiled with FP32 support");
530 return this->entropy_sum<double>();
540 pout <<
"total number of k-points : " <<
num_kpoints() << std::endl;
541 pout <<
hbar(80,
'-') << std::endl;
543 pout <<
" ik vk weight num_gkvec";
544 if (
ctx_.full_potential()) {
545 pout <<
" gklo_basis_size";
547 pout << std::endl <<
hbar(80,
'-') << std::endl;
550 for (
auto it : spl_num_kpoints()) {
552 pout << std::setw(4) << ik <<
ffmt(9, 4) <<
kpoints_[ik]->vk()[0] <<
ffmt(9, 4)
556 if (
ctx_.full_potential()) {
557 pout << std::setw(18) <<
kpoints_[ik]->gklo_basis_size();
561 RTE_OUT(
ctx_.
out()) << pout.flush(0);
568 HDF5_tree(name__, hdf5_access_t::truncate);
570 HDF5_tree fout(name__, hdf5_access_t::read_write);
578 this->get<double>(ik)->save(name__, ik);
588 RTE_THROW(
"not implemented");
Interface to the HDF5 library.
HDF5_tree create_node(int idx)
Create node by integer index.
void write(const std::string &name, T const *data, std::vector< int > const &dims)
Write a multidimensional array.
int num_kpoints() const
Return total number of k-points.
double entropy_sum() const
Return entropy contribution from smearing store in Kpoint<T>.
double energy_fermi_
Fermi energy which is searched in find_band_occupancies().
void add_kpoint(r3::vector< double > vk__, double weight__)
Add k-point to the set.
std::vector< std::unique_ptr< K_point< double > > > kpoints_
List of k-points.
void initialize(std::vector< int > const &counts={})
Initialize the k-point set.
Simulation_context & ctx_
Context of a simulation.
double valence_eval_sum() const
Return sum of valence eigen-values store in Kpoint<T>.
void sync_band()
Sync band energies or occupancies between all MPI ranks.
void print_info()
Print basic info to the standard output.
void find_band_occupancies()
Find Fermi energy and band occupation numbers.
void create_k_mesh(r3::vector< int > k_grid__, r3::vector< int > k_shift__, int use_symmetry__)
Create regular grid of k-points.
double band_gap_
Band gap found by find_band_occupancies().
void save(std::string const &name__) const
Save k-point set to HDF5 file.
splindex_chunk< kp_index_t > spl_num_kpoints_
Split index of k-points.
auto const & comm_band() const
Band parallelization communicator.
auto const & comm_k() const
Communicator between k-points.
std::ostream & out() const
Return output stream.
mpi::Communicator const & comm() const
Total communicator of the simulation.
int num_spins() const
Number of spin components.
int num_spinors() const
Number of spinor wave-functions labeled by a sinlge band index.
int max_occupancy() const
Maximum band occupancy.
int num_bands(int num_bands__)
Set the number of bands.
int num_mag_dims() const
Number of dimensions in the magnetization vector.
Floating-point formatting (precision and width).
void allreduce(T *buffer__, int count__) const
Perform the in-place (the output buffer is used as the input buffer) all-to-all reduction.
int rank() const
Rank of MPI process inside communicator.
Parallel standard output.
Simple implementation of 3d vector.
void zero(memory_t mem__, size_t idx0__, size_t n__)
Zero n elements starting from idx0.
size_t size() const
Return total size (number of elements) of the array.
value_type local_size(block_id block_id__) const
Return local size of the split index for a given block.
Index_t::global global_index(typename Index_t::local idxloc__, block_id block_id__) const
Return global index of an element by local index and block id.
Externally defined block distribution.
Find the irriducible k-points of the Brillouin zone.
Contains definition of sirius::K_point class.
Contains declaration and partial implementation of sirius::K_point_set class.
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
Namespace of the SIRIUS library.
double eval_sum(Unit_cell const &unit_cell, K_point_set const &kset)
TODO doc.
strong_type< int, struct __block_id_tag > block_id
ID of the block.
strong_type< int, struct __n_blocks_tag > n_blocks
Number of blocks to which the global index is split.
auto newton_minimization_chemical_potential(Nt &&N, DNt &&dN, D2Nt &&ddN, double mu0, double ne, double tol, int maxstep=1000)
bool file_exists(std::string file_name)
Check if file exists.
Smearing functions used in finding the band occupancies.