SIRIUS 7.5.0
Electronic structure library and applications
k_point.cpp
Go to the documentation of this file.
1// Copyright (c) 2013-2018 Anton Kozhevnikov, Mathieu Taillefumier, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file k_point.cpp
21 *
22 * \brief Contains partial implementation of sirius::K_point class.
23 */
24
25#include "k_point/k_point.hpp"
28
29namespace sirius {
30
31template <typename T>
32void
34{
35 PROFILE("sirius::K_point::initialize");
36
37 zil_.resize(ctx_.unit_cell().lmax_apw() + 1);
38 for (int l = 0; l <= ctx_.unit_cell().lmax_apw(); l++) {
39 zil_[l] = std::pow(std::complex<T>(0, 1), l);
40 }
41
42 l_by_lm_ = sf::l_by_lm(ctx_.unit_cell().lmax_apw());
43
44 int bs = ctx_.cyclic_block_size();
45
46 /* In case of collinear magnetism we store only non-zero spinor components.
47 *
48 * non magnetic case:
49 * .---.
50 * | |
51 * .---.
52 *
53 * collinear case:
54 * .---. .---.
55 * |uu | 0 |uu |
56 * .---.---. -> .---.
57 * 0 |dd | |dd |
58 * .---. .---.
59 *
60 * non collinear case:
61 * .-------.
62 * | |
63 * .-------.
64 * | |
65 * .-------.
66 */
67 int nst = ctx_.num_bands();
68
69 auto mem_type_evp = ctx_.std_evp_solver().host_memory_t();
70 auto mem_type_gevp = ctx_.gen_evp_solver().host_memory_t();
71
72 /* build a full list of G+k vectors for all MPI ranks */
73 generate_gkvec(ctx_.gk_cutoff());
74 /* build a list of basis functions */
75 generate_gklo_basis();
76
77 if (ctx_.full_potential()) {
78 if (ctx_.cfg().control().use_second_variation()) {
79
80 RTE_ASSERT(ctx_.num_fv_states() > 0);
81 fv_eigen_values_ = sddk::mdarray<double, 1>(ctx_.num_fv_states(), sddk::memory_t::host, "fv_eigen_values");
82
83 if (ctx_.need_sv()) {
84 /* in case of collinear magnetism store pure up and pure dn components, otherwise store the full matrix
85 */
86 for (int is = 0; is < ctx_.num_spinors(); is++) {
87 sv_eigen_vectors_[is] = la::dmatrix<std::complex<T>>(nst, nst, ctx_.blacs_grid(), bs, bs, mem_type_evp);
88 }
89 }
90
91 std::vector<int> num_mt_coeffs(unit_cell_.num_atoms());
92 for (int ia = 0; ia < unit_cell_.num_atoms(); ia++) {
93 num_mt_coeffs[ia] = unit_cell_.atom(ia).mt_lo_basis_size();
94 }
95
96 /* allocate fv eien vectors */
97 fv_eigen_vectors_slab_ = std::make_unique<wf::Wave_functions<T>>(
98 gkvec_, num_mt_coeffs, wf::num_mag_dims(0), wf::num_bands(ctx_.num_fv_states()),
99 ctx_.host_memory_t());
100
101 fv_eigen_vectors_slab_->zero(sddk::memory_t::host, wf::spin_index(0),
102 wf::band_range(0, ctx_.num_fv_states()));
103 for (int i = 0; i < ctx_.num_fv_states(); i++) {
104 for (int igloc = 0; igloc < gkvec().gvec_count(comm().rank()); igloc++) {
105 int ig = igloc + gkvec().gvec_offset(comm().rank());
106 if (ig == i) {
107 fv_eigen_vectors_slab_->pw_coeffs(igloc, wf::spin_index(0), wf::band_index(i)) = 1.0;
108 }
109 if (ig == i + 1) {
110 fv_eigen_vectors_slab_->pw_coeffs(igloc, wf::spin_index(0), wf::band_index(i)) = 0.5;
111 }
112 if (ig == i + 2) {
113 fv_eigen_vectors_slab_->pw_coeffs(igloc, wf::spin_index(0), wf::band_index(i)) = 0.125;
114 }
115 }
116 }
117 if (ctx_.cfg().iterative_solver().type() == "exact") {
118 /* ELPA needs a full matrix of eigen-vectors as it uses it as a work space */
119 if (ctx_.gen_evp_solver().type() == la::ev_solver_t::elpa || ctx_.gen_evp_solver().type() == la::ev_solver_t::dlaf) {
120 fv_eigen_vectors_ = la::dmatrix<std::complex<T>>(gklo_basis_size(), gklo_basis_size(),
121 ctx_.blacs_grid(), bs, bs, mem_type_gevp);
122 } else {
123 fv_eigen_vectors_ = la::dmatrix<std::complex<T>>(gklo_basis_size(), ctx_.num_fv_states(),
124 ctx_.blacs_grid(), bs, bs, mem_type_gevp);
125 }
126 } else {
127 int ncomp = ctx_.cfg().iterative_solver().num_singular();
128 if (ncomp < 0) {
129 ncomp = ctx_.num_fv_states() / 2;
130 }
131
132 singular_components_ = std::make_unique<wf::Wave_functions<T>>(
133 gkvec_, num_mt_coeffs, wf::num_mag_dims(0), wf::num_bands(ncomp), ctx_.host_memory_t());
134
135 singular_components_->zero(sddk::memory_t::host, wf::spin_index(0), wf::band_range(0, ncomp));
136 /* starting guess for wave-functions */
137 for (int i = 0; i < ncomp; i++) {
138 for (int igloc = 0; igloc < gkvec().count(); igloc++) {
139 int ig = igloc + gkvec().offset();
140 if (ig == i) {
141 singular_components_->pw_coeffs(igloc, wf::spin_index(0), wf::band_index(i)) = 1.0;
142 }
143 if (ig == i + 1) {
144 singular_components_->pw_coeffs(igloc, wf::spin_index(0), wf::band_index(i)) = 0.5;
145 }
146 if (ig == i + 2) {
147 singular_components_->pw_coeffs(igloc, wf::spin_index(0), wf::band_index(i)) = 0.125;
148 }
149 }
150 }
151 }
152
153 for (int ia = 0; ia < unit_cell_.num_atoms(); ia++) {
154 num_mt_coeffs[ia] = unit_cell_.atom(ia).mt_basis_size();
155 }
156 fv_states_ = std::make_unique<wf::Wave_functions<T>>(gkvec_, num_mt_coeffs, wf::num_mag_dims(0),
157 wf::num_bands(ctx_.num_fv_states()), ctx_.host_memory_t());
158
159 spinor_wave_functions_ = std::make_unique<wf::Wave_functions<T>>(gkvec_, num_mt_coeffs,
160 wf::num_mag_dims(ctx_.num_mag_dims()), wf::num_bands(nst), ctx_.host_memory_t());
161 } else {
162 RTE_THROW("not implemented");
163 }
164 } else {
165 spinor_wave_functions_ = std::make_unique<wf::Wave_functions<T>>(gkvec_, wf::num_mag_dims(ctx_.num_mag_dims()),
166 wf::num_bands(nst), ctx_.host_memory_t());
167
168 if (ctx_.hubbard_correction()) {
169 /* allocate Hubbard wave-functions */
170 int nwfh = unit_cell_.num_hubbard_wf().first;
171 int nwf = unit_cell_.num_ps_atomic_wf().first;
172
173 hubbard_wave_functions_ = std::make_unique<wf::Wave_functions<T>>(gkvec_, wf::num_mag_dims(0),
174 wf::num_bands(nwfh), ctx_.host_memory_t());
175 hubbard_wave_functions_S_ = std::make_unique<wf::Wave_functions<T>>(gkvec_, wf::num_mag_dims(0),
176 wf::num_bands(nwfh), ctx_.host_memory_t());
177 atomic_wave_functions_ = std::make_unique<wf::Wave_functions<T>>(gkvec_, wf::num_mag_dims(0),
178 wf::num_bands(nwf), ctx_.host_memory_t());
179 atomic_wave_functions_S_ = std::make_unique<wf::Wave_functions<T>>(gkvec_, wf::num_mag_dims(0),
180 wf::num_bands(nwf), ctx_.host_memory_t());
181 }
182 }
183
184 update();
185}
186
187template <typename T>
188void
190{
191 PROFILE("sirius::K_point::generate_hubbard_orbitals");
192
193 if (ctx_.so_correction()) {
194 RTE_THROW("Hubbard+SO is not implemented");
195 }
196 if (ctx_.gamma_point()) {
197 RTE_THROW("Hubbard+Gamma point is not implemented");
198 }
199
200 auto num_ps_atomic_wf = unit_cell_.num_ps_atomic_wf();
201 int nwf = num_ps_atomic_wf.first;
202
203 /* generate the initial atomic wavefunctions (full set composed of all atoms wfs) */
204 std::vector<int> atoms(ctx_.unit_cell().num_atoms());
205 std::iota(atoms.begin(), atoms.end(), 0);
206
207 this->generate_atomic_wave_functions(atoms, [&](int iat){ return &ctx_.unit_cell().atom_type(iat).indexb_wfs(); },
208 *ctx_.ri().ps_atomic_wf_, *atomic_wave_functions_);
209
210 auto pcs = env::print_checksum();
211 if (pcs) {
212 auto cs = atomic_wave_functions_->checksum(sddk::memory_t::host, wf::spin_index(0), wf::band_range(0, nwf));
213 if (this->comm().rank() == 0) {
214 print_checksum("atomic_wave_functions", cs, RTE_OUT(std::cout));
215 }
216 }
217
218 /* check if we have a norm conserving pseudo potential only */
219 auto q_op = (unit_cell_.augment()) ? std::make_unique<Q_operator<T>>(ctx_) : nullptr;
220
221 auto mem = ctx_.processing_unit_memory_t();
222
223 std::unique_ptr<wf::Wave_functions<T>> wf_tmp;
224 std::unique_ptr<wf::Wave_functions<T>> swf_tmp;
225
226 {
227 auto mg1 = atomic_wave_functions_->memory_guard(mem, wf::copy_to::device | wf::copy_to::host);
228 auto mg2 = atomic_wave_functions_S_->memory_guard(mem, wf::copy_to::host);
229
230 /* compute S|phi> */
231 auto bp_gen = beta_projectors().make_generator();
232 auto bp_coeffs = bp_gen.prepare();
233
234 sirius::apply_S_operator<T, std::complex<T>>(mem, wf::spin_range(0), wf::band_range(0, nwf), bp_gen, bp_coeffs,
235 *atomic_wave_functions_, q_op.get(), *atomic_wave_functions_S_);
236
237 if (ctx_.cfg().hubbard().full_orthogonalization()) {
238 /* save phi and sphi */
239
240 wf_tmp = std::make_unique<wf::Wave_functions<T>>(gkvec_, wf::num_mag_dims(0), wf::num_bands(nwf),
241 ctx_.host_memory_t());
242 swf_tmp = std::make_unique<wf::Wave_functions<T>>(gkvec_, wf::num_mag_dims(0), wf::num_bands(nwf),
243 ctx_.host_memory_t());
244
245 auto mg3 = wf_tmp->memory_guard(mem, wf::copy_to::host);
246 auto mg4 = swf_tmp->memory_guard(mem, wf::copy_to::host);
247
248 wf::copy(mem, *atomic_wave_functions_, wf::spin_index(0), wf::band_range(0, nwf),
249 *wf_tmp, wf::spin_index(0), wf::band_range(0, nwf));
250 wf::copy(mem, *atomic_wave_functions_S_, wf::spin_index(0), wf::band_range(0, nwf),
251 *swf_tmp, wf::spin_index(0), wf::band_range(0, nwf));
252
253 int BS = ctx_.cyclic_block_size();
254 la::dmatrix<std::complex<T>> ovlp(nwf, nwf, ctx_.blacs_grid(), BS, BS);
255
256 wf::inner(ctx_.spla_context(), mem, wf::spin_range(0), *atomic_wave_functions_,
257 wf::band_range(0, nwf), *atomic_wave_functions_S_, wf::band_range(0, nwf), ovlp, 0, 0);
258 auto B = std::get<0>(inverse_sqrt(ovlp, nwf));
260 /* use sphi as temporary */
261 wf::transform(ctx_.spla_context(), mem, *B, 0, 0, 1.0, *atomic_wave_functions_,
262 wf::spin_index(0), wf::band_range(0, nwf), 0.0, *atomic_wave_functions_S_, wf::spin_index(0),
263 wf::band_range(0, nwf));
264
265 wf::copy(mem, *atomic_wave_functions_S_, wf::spin_index(0), wf::band_range(0, nwf),
266 *atomic_wave_functions_, wf::spin_index(0), wf::band_range(0, nwf));
267
268 apply_S_operator<T, std::complex<T>>(mem, wf::spin_range(0), wf::band_range(0, nwf), bp_gen, bp_coeffs,
269 *atomic_wave_functions_, q_op.get(), *atomic_wave_functions_S_);
270
271 //if (ctx_.cfg().control().verification() >= 1) {
272 // sddk::inner(ctx_.spla_context(), sddk::spin_range(0), phi, 0, nwf, sphi, 0, nwf, ovlp, 0, 0);
273
274 // auto diff = check_identity(ovlp, nwf);
275 // RTE_OUT(std::cout) << "orthogonalization error " << diff << std::endl;
276 //}
277 }
278
279 // beta_projectors().dismiss();
280 }
281
282 if (pcs) {
283 auto cs = atomic_wave_functions_S_->checksum(sddk::memory_t::host, wf::spin_index(0), wf::band_range(0, nwf));
284 if (this->comm().rank() == 0) {
285 print_checksum("atomic_wave_functions_S", cs, RTE_OUT(std::cout));
286 }
287 }
288
289 auto num_hubbard_wf = unit_cell_.num_hubbard_wf();
290
291 for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
292 auto& atom = ctx_.unit_cell().atom(ia);
293 auto& type = atom.type();
294 if (type.hubbard_correction()) {
295 /* loop over Hubbard orbitals of the atom */
296 for (auto e : type.indexr_hub()) {
297 /* Hubbard orbital descriptor */
298 auto& hd = type.lo_descriptor_hub(e.idxrf);
299 int l = e.am.l();
300 int mmax = 2 * l + 1;
301
302 int idxr_wf = hd.idx_wf();
303
304 int offset_in_wf = num_ps_atomic_wf.second[ia] + type.indexb_wfs().index_of(rf_index(idxr_wf));
305 int offset_in_hwf = num_hubbard_wf.second[ia] + type.indexb_hub().index_of(e.idxrf);
306
307 wf::copy(sddk::memory_t::host, *atomic_wave_functions_, wf::spin_index(0),
308 wf::band_range(offset_in_wf, offset_in_wf + mmax), *hubbard_wave_functions_,
309 wf::spin_index(0), wf::band_range(offset_in_hwf, offset_in_hwf + mmax));
310
311 wf::copy(sddk::memory_t::host, *atomic_wave_functions_S_, wf::spin_index(0),
312 wf::band_range(offset_in_wf, offset_in_wf + mmax), *hubbard_wave_functions_S_,
313 wf::spin_index(0), wf::band_range(offset_in_hwf, offset_in_hwf + mmax));
314 }
315 }
316 }
317 /* restore phi and sphi */
318 if (ctx_.cfg().hubbard().full_orthogonalization()) {
319
320 wf::copy(sddk::memory_t::host, *wf_tmp, wf::spin_index(0), wf::band_range(0, nwf),
321 *atomic_wave_functions_, wf::spin_index(0), wf::band_range(0, nwf));
322 wf::copy(sddk::memory_t::host, *swf_tmp, wf::spin_index(0), wf::band_range(0, nwf),
323 *atomic_wave_functions_S_, wf::spin_index(0), wf::band_range(0, nwf));
324 }
325
326 if (pcs) {
327 auto cs1 = hubbard_wave_functions_->checksum(sddk::memory_t::host, wf::spin_index(0),
328 wf::band_range(0, num_hubbard_wf.first));
329 auto cs2 = hubbard_wave_functions_S_->checksum(sddk::memory_t::host, wf::spin_index(0),
330 wf::band_range(0, num_hubbard_wf.first));
331 if (comm().rank() == 0) {
332 print_checksum("hubbard_wave_functions", cs1, RTE_OUT(std::cout));
333 print_checksum("hubbard_wave_functions_S", cs2, RTE_OUT(std::cout));
334 }
335 }
336}
337
338template <typename T>
339void
340K_point<T>::generate_gkvec(double gk_cutoff__)
341{
342 PROFILE("sirius::K_point::generate_gkvec");
343
344 gkvec_partition_ = std::make_shared<fft::Gvec_fft>(
345 this->gkvec(), ctx_.comm_fft_coarse(), ctx_.comm_band_ortho_fft_coarse());
346
347 const auto fft_type = gkvec_->reduced() ? SPFFT_TRANS_R2C : SPFFT_TRANS_C2C;
348 const auto spfft_pu = ctx_.processing_unit() == sddk::device_t::CPU ? SPFFT_PU_HOST : SPFFT_PU_GPU;
349 auto const& gv = gkvec_partition_->gvec_array();
350 /* create transformation */
351 spfft_transform_.reset(new fft::spfft_transform_type<T>(ctx_.spfft_grid_coarse<T>().create_transform(
352 spfft_pu, fft_type, ctx_.fft_coarse_grid()[0], ctx_.fft_coarse_grid()[1], ctx_.fft_coarse_grid()[2],
353 ctx_.spfft_coarse<double>().local_z_length(), gkvec_partition_->count(), SPFFT_INDEX_TRIPLETS,
354 gv.at(sddk::memory_t::host))));
355
356 splindex_block_cyclic<> spl_ngk_row(num_gkvec(), n_blocks(num_ranks_row_), block_id(rank_row_),
357 ctx_.cyclic_block_size());
358 num_gkvec_row_ = spl_ngk_row.local_size();
359 sddk::mdarray<int, 2> gkvec_row(3, num_gkvec_row_);
360
361 splindex_block_cyclic<> spl_ngk_col(num_gkvec(), n_blocks(num_ranks_col_), block_id(rank_col_),
362 ctx_.cyclic_block_size());
363 num_gkvec_col_ = spl_ngk_col.local_size();
364 sddk::mdarray<int, 2> gkvec_col(3, num_gkvec_col_);
365
366 for (int rank = 0; rank < comm().size(); rank++) {
367 auto gv = gkvec_->gvec_local(rank);
368 for (int igloc = 0; igloc < gkvec_->gvec_count(rank); igloc++) {
369 int ig = gkvec_->gvec_offset(rank) + igloc;
370 auto loc_row = spl_ngk_row.location(ig);
371 auto loc_col = spl_ngk_col.location(ig);
372 if (loc_row.ib == comm_row().rank()) {
373 for (int x : {0, 1, 2}) {
374 gkvec_row(x, loc_row.index_local) = gv(x, igloc);
375 }
376 }
377 if (loc_col.ib == comm_col().rank()) {
378 for (int x : {0, 1, 2}) {
379 gkvec_col(x, loc_col.index_local) = gv(x, igloc);
380 }
381 }
382 }
383 }
384 gkvec_row_ = std::make_shared<fft::Gvec>(vk_, unit_cell_.reciprocal_lattice_vectors(), num_gkvec_row_,
385 &gkvec_row(0, 0), comm_row(), ctx_.gamma_point());
386
387 gkvec_col_ = std::make_shared<fft::Gvec>(vk_, unit_cell_.reciprocal_lattice_vectors(), num_gkvec_col_,
388 &gkvec_col(0, 0), comm_col(), ctx_.gamma_point());
389}
390
391template <typename T>
392void
394{
395 PROFILE("sirius::K_point::update");
396
397 gkvec_->lattice_vectors(ctx_.unit_cell().reciprocal_lattice_vectors());
398 gkvec_partition_->update_gkvec_cart();
399
400 if (ctx_.full_potential()) {
401 if (ctx_.cfg().iterative_solver().type() == "exact") {
402 alm_coeffs_row_ = std::make_unique<Matching_coefficients>(unit_cell_, *gkvec_row_);
403 alm_coeffs_col_ = std::make_unique<Matching_coefficients>(unit_cell_, *gkvec_col_);
404 }
405 alm_coeffs_loc_ = std::make_unique<Matching_coefficients>(unit_cell_, gkvec());
406 }
407
408 if (!ctx_.full_potential()) {
409 /* compute |beta> projectors for atom types */
410 beta_projectors_ = std::make_unique<Beta_projectors<T>>(ctx_, gkvec());
411
412 if (ctx_.cfg().iterative_solver().type() == "exact") {
413 beta_projectors_row_ = std::make_unique<Beta_projectors<T>>(ctx_, *gkvec_row_);
414 beta_projectors_col_ = std::make_unique<Beta_projectors<T>>(ctx_, *gkvec_col_);
415 }
416
417 if (ctx_.hubbard_correction()) {
418 generate_hubbard_orbitals();
419 }
420 }
421}
422
423template <typename T>
424void
425K_point<T>::get_fv_eigen_vectors(sddk::mdarray<std::complex<T>, 2>& fv_evec__) const
426{
427 RTE_ASSERT((int)fv_evec__.size(0) >= gklo_basis_size());
428 RTE_ASSERT((int)fv_evec__.size(1) == ctx_.num_fv_states());
429 RTE_ASSERT(gklo_basis_size_row() == fv_eigen_vectors_.num_rows_local());
430
431 sddk::mdarray<std::complex<T>, 1> tmp(gklo_basis_size_row());
432
433 /* zero global array */
434 fv_evec__.zero();
435
436 try {
437 for (int ist = 0; ist < ctx_.num_fv_states(); ist++) {
438 for (int igloc = 0; igloc < this->num_gkvec_loc(); igloc++) {
439 int ig = this->gkvec().offset() + igloc;
440 fv_evec__(ig, ist) = fv_eigen_vectors_slab_->pw_coeffs(igloc, wf::spin_index(0), wf::band_index(ist));
441 }
442 this->comm().allgather(fv_evec__.at(sddk::memory_t::host, 0, ist), this->gkvec().count(),
443 this->gkvec().offset());
444 }
445 } catch(std::exception const& e) {
446 std::stringstream s;
447 s << e.what() << std::endl;
448 s << "Error in getting plane-wave coefficients";
449 RTE_THROW(s);
450 }
451
452 try {
453 for (int ist = 0; ist < ctx_.num_fv_states(); ist++) {
454 /* offset in the global index of local orbitals */
455 int offs{0};
456 for (int ia = 0; ia < ctx_.unit_cell().num_atoms(); ia++) {
457 /* number of atom local orbitals */
458 int nlo = ctx_.unit_cell().atom(ia).mt_lo_basis_size();
459 auto loc = fv_eigen_vectors_slab_->spl_num_atoms().location(typename atom_index_t::global(ia));
460 if (loc.ib == this->comm().rank()) {
461 for (int xi = 0; xi < nlo; xi++) {
462 fv_evec__(this->num_gkvec() + offs + xi, ist) =
463 fv_eigen_vectors_slab_->mt_coeffs(xi, atom_index_t::local(loc.index_local), wf::spin_index(0),
464 wf::band_index(ist));
465 }
466 }
467 offs += nlo;
468 }
469 auto& mtd = fv_eigen_vectors_slab_->mt_coeffs_distr();
470 this->comm().allgather(fv_evec__.at(sddk::memory_t::host, this->num_gkvec(), ist),
471 mtd.counts.data(), mtd.offsets.data());
472 }
473 } catch(std::exception const& e) {
474 std::stringstream s;
475 s << e.what() << std::endl;
476 s << "Error in getting muffin-tin coefficients";
477 RTE_THROW(s);
478 }
479}
480
481//== void K_point::check_alm(int num_gkvec_loc, int ia, mdarray<double_complex, 2>& alm)
482//== {
483//== static SHT* sht = NULL;
484//== if (!sht) sht = new SHT(ctx_.lmax_apw());
485//==
486//== Atom* atom = unit_cell_.atom(ia);
487//== Atom_type* type = atom->type();
488//==
489//== mdarray<double_complex, 2> z1(sht->num_points(), type->mt_aw_basis_size());
490//== for (int i = 0; i < type->mt_aw_basis_size(); i++)
491//== {
492//== int lm = type->indexb(i).lm;
493//== int idxrf = type->indexb(i).idxrf;
494//== double rf = atom->symmetry_class()->radial_function(atom->num_mt_points() - 1, idxrf);
495//== for (int itp = 0; itp < sht->num_points(); itp++)
496//== {
497//== z1(itp, i) = sht->ylm_backward(lm, itp) * rf;
498//== }
499//== }
500//==
501//== mdarray<double_complex, 2> z2(sht->num_points(), num_gkvec_loc);
502//== blas<CPU>::gemm(0, 2, sht->num_points(), num_gkvec_loc, type->mt_aw_basis_size(), z1.ptr(), z1.ld(),
503//== alm.ptr(), alm.ld(), z2.ptr(), z2.ld());
504//==
505//== vector3d<double> vc = unit_cell_.get_cartesian_coordinates(unit_cell_.atom(ia)->position());
506//==
507//== double tdiff = 0;
508//== for (int igloc = 0; igloc < num_gkvec_loc; igloc++)
509//== {
510//== vector3d<double> gkc = gkvec_cart(igkglob(igloc));
511//== for (int itp = 0; itp < sht->num_points(); itp++)
512//== {
513//== double_complex aw_value = z2(itp, igloc);
514//== vector3d<double> r;
515//== for (int x = 0; x < 3; x++) r[x] = vc[x] + sht->coord(x, itp) * type->mt_radius();
516//== double_complex pw_value = exp(double_complex(0, Utils::scalar_product(r, gkc))) /
517//sqrt(unit_cell_.omega());
518//== tdiff += abs(pw_value - aw_value);
519//== }
520//== }
521//==
522//== printf("atom : %i absolute alm error : %e average alm error : %e\n",
523//== ia, tdiff, tdiff / (num_gkvec_loc * sht->num_points()));
524//== }
525
526/** The following HDF5 data structure is created:
527 \verbatim
528 /K_point_set/ik/vk
529 /K_point_set/ik/band_energies
530 /K_point_set/ik/band_occupancies
531 /K_point_set/ik/gkvec
532 /K_point_set/ik/gvec
533 /K_point_set/ik/bands/ibnd/spinor_wave_function/ispn/pw
534 /K_point_set/ik/bands/ibnd/spinor_wave_function/ispn/mt
535 \endverbatim
536*/
537template <typename T>
538void
539K_point<T>::save(std::string const& name__, int id__) const
540{
541 /* rank 0 creates placeholders in the HDF5 file */
542 if (comm().rank() == 0) {
543 /* open file with write access */
544 HDF5_tree fout(name__, hdf5_access_t::read_write);
545 /* create /K_point_set/ik */
546 fout["K_point_set"].create_node(id__);
547 fout["K_point_set"][id__].write("vk", &vk_[0], 3);
548 fout["K_point_set"][id__].write("band_energies", band_energies_);
549 fout["K_point_set"][id__].write("band_occupancies", band_occupancies_);
550
551 /* save the entire G+k object */
552 //TODO: only the list of z-columns is probably needed to recreate the G+k vectors
553 //serializer s;
554 //gkvec().pack(s);
555 //fout["K_point_set"][id__].write("gkvec", s.stream());
556
557 /* save the order of G-vectors */
558 sddk::mdarray<int, 2> gv(3, num_gkvec());
559 for (int i = 0; i < num_gkvec(); i++) {
560 auto v = gkvec().template gvec<index_domain_t::global>(i);
561 for (int x : {0, 1, 2}) {
562 gv(x, i) = v[x];
563 }
564 }
565 fout["K_point_set"][id__].write("gvec", gv);
566 fout["K_point_set"][id__].create_node("bands");
567 for (int i = 0; i < ctx_.num_bands(); i++) {
568 fout["K_point_set"][id__]["bands"].create_node(i);
569 fout["K_point_set"][id__]["bands"][i].create_node("spinor_wave_function");
570 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
571 fout["K_point_set"][id__]["bands"][i]["spinor_wave_function"].create_node(ispn);
572 }
573 }
574 }
575 /* wait for rank 0 */
576 comm().barrier();
577 //int gkvec_count = gkvec().count();
578 //int gkvec_offset = gkvec().offset();
579 //std::vector<std::complex<T>> wf_tmp(num_gkvec());
580
581 //std::unique_ptr<sddk::HDF5_tree> fout;
582 /* rank 0 opens a file */
583 //if (comm().rank() == 0) {
584 // fout = std::make_unique<sddk::HDF5_tree>(name__, sddk::hdf5_access_t::read_write);
585 //}
586
587 RTE_THROW("re-implement");
588
589 ///* store wave-functions */
590 //for (int i = 0; i < ctx_.num_bands(); i++) {
591 // for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
592 // /* gather full column of PW coefficients on rank 0 */
593 // comm().gather(&spinor_wave_functions_->pw_coeffs(ispn).prime(0, i), wf_tmp.data(), gkvec_offset,
594 // gkvec_count, 0);
595 // if (comm().rank() == 0) {
596 // (*fout)["K_point_set"][id__]["bands"][i]["spinor_wave_function"][ispn].write("pw", wf_tmp);
597 // }
598 // }
599 // comm().barrier();
600 //}
601}
602
603template <typename T>
604void
605K_point<T>::load(HDF5_tree h5in, int id)
606{
607 RTE_THROW("not implemented");
608 //== band_energies_.resize(ctx_.num_bands());
609 //== h5in[id].read("band_energies", band_energies_);
610
611 //== band_occupancies_.resize(ctx_.num_bands());
612 //== h5in[id].read("band_occupancies", band_occupancies_);
613 //==
614 //== h5in[id].read_mdarray("fv_eigen_vectors", fv_eigen_vectors_panel_);
615 //== h5in[id].read_mdarray("sv_eigen_vectors", sv_eigen_vectors_);
616}
617
618//== void K_point::save_wave_functions(int id)
619//== {
620//== if (ctx_.mpi_grid().root(1 << _dim_col_))
621//== {
622//== HDF5_tree fout(storage_file_name, false);
623//==
624//== fout["K_points"].create_node(id);
625//== fout["K_points"][id].write("coordinates", &vk_[0], 3);
626//== fout["K_points"][id].write("mtgk_size", mtgk_size());
627//== fout["K_points"][id].create_node("spinor_wave_functions");
628//== fout["K_points"][id].write("band_energies", &band_energies_[0], ctx_.num_bands());
629//== fout["K_points"][id].write("band_occupancies", &band_occupancies_[0], ctx_.num_bands());
630//== }
631//==
632//== Platform::barrier(ctx_.mpi_grid().communicator(1 << _dim_col_));
633//==
634//== mdarray<double_complex, 2> wfj(NULL, mtgk_size(), ctx_.num_spins());
635//== for (int j = 0; j < ctx_.num_bands(); j++)
636//== {
637//== int rank = ctx_.spl_spinor_wf_col().location(_splindex_rank_, j);
638//== int offs = ctx_.spl_spinor_wf_col().location(_splindex_offs_, j);
639//== if (ctx_.mpi_grid().coordinate(_dim_col_) == rank)
640//== {
641//== HDF5_tree fout(storage_file_name, false);
642//== wfj.set_ptr(&spinor_wave_functions_(0, 0, offs));
643//== fout["K_points"][id]["spinor_wave_functions"].write_mdarray(j, wfj);
644//== }
645//== Platform::barrier(ctx_.mpi_grid().communicator(_dim_col_));
646//== }
647//== }
648//==
649//== void K_point::load_wave_functions(int id)
650//== {
651//== HDF5_tree fin(storage_file_name, false);
652//==
653//== int mtgk_size_in;
654//== fin["K_points"][id].read("mtgk_size", &mtgk_size_in);
655//== if (mtgk_size_in != mtgk_size()) error_local(__FILE__, __LINE__, "wrong wave-function size");
656//==
657//== band_energies_.resize(ctx_.num_bands());
658//== fin["K_points"][id].read("band_energies", &band_energies_[0], ctx_.num_bands());
659//==
660//== band_occupancies_.resize(ctx_.num_bands());
661//== fin["K_points"][id].read("band_occupancies", &band_occupancies_[0], ctx_.num_bands());
662//==
663//== spinor_wave_functions_.set_dimensions(mtgk_size(), ctx_.num_spins(),
664//== ctx_.spl_spinor_wf_col().local_size());
665//== spinor_wave_functions_.allocate();
666//==
667//== mdarray<double_complex, 2> wfj(NULL, mtgk_size(), ctx_.num_spins());
668//== for (int jloc = 0; jloc < ctx_.spl_spinor_wf_col().local_size(); jloc++)
669//== {
670//== int j = ctx_.spl_spinor_wf_col(jloc);
671//== wfj.set_ptr(&spinor_wave_functions_(0, 0, jloc));
672//== fin["K_points"][id]["spinor_wave_functions"].read_mdarray(j, wfj);
673//== }
674//== }
675
676template <typename T>
677void
679 std::function<basis_functions_index const*(int)> indexb__,
681{
682 PROFILE("sirius::K_point::generate_atomic_wave_functions");
683
684 int lmax{3};
685 int lmmax = sf::lmmax(lmax);
686
687 /* compute offset for each atom */
688 std::vector<int> offset;
689 int n{0};
690 for (int ia : atoms__) {
691 offset.push_back(n);
692 int iat = unit_cell_.atom(ia).type_id();
693 n += indexb__(iat)->size();
694 }
695
696 /* allocate memory to store wave-functions for atom types */
697 std::vector<sddk::mdarray<std::complex<T>, 2>> wf_t(unit_cell_.num_atom_types());
698 for (int ia : atoms__) {
699 int iat = unit_cell_.atom(ia).type_id();
700 if (wf_t[iat].size() == 0) {
701 wf_t[iat] = sddk::mdarray<std::complex<T>, 2>(this->num_gkvec_loc(), indexb__(iat)->size(),
702 get_memory_pool(sddk::memory_t::host));
703 }
704 }
705
706 #pragma omp parallel for schedule(static)
707 for (int igk_loc = 0; igk_loc < this->num_gkvec_loc(); igk_loc++) {
708 /* vs = {r, theta, phi} */
709 auto vs = r3::spherical_coordinates(this->gkvec().template gkvec_cart<index_domain_t::local>(igk_loc));
710
711 /* compute real spherical harmonics for G+k vector */
712 std::vector<double> rlm(lmmax);
713 sf::spherical_harmonics(lmax, vs[1], vs[2], &rlm[0]);
714
715 /* get all values of the radial integrals for a given G+k vector */
716 std::vector<sddk::mdarray<double, 1>> ri_values(unit_cell_.num_atom_types());
717 for (int iat = 0; iat < unit_cell_.num_atom_types(); iat++) {
718 if (wf_t[iat].size() != 0) {
719 ri_values[iat] = ri__.values(iat, vs[0]);
720 }
721 }
722 for (int iat = 0; iat < unit_cell_.num_atom_types(); iat++) {
723 if (wf_t[iat].size() == 0) {
724 continue;
725 }
726 auto const& indexb = *indexb__(iat);
727 for (auto const& e: indexb) {
728 auto z = std::pow(std::complex<double>(0, -1), e.am.l()) * fourpi / std::sqrt(unit_cell_.omega());
729
730 wf_t[iat](igk_loc, e.xi) = static_cast<std::complex<T>>(z * rlm[e.lm] * ri_values[iat](e.idxrf));
731 }
732 }
733 }
734
735 for (int ia : atoms__) {
736
737 T phase = twopi * dot(gkvec().vk(), unit_cell_.atom(ia).position());
738 auto phase_k = std::exp(std::complex<T>(0.0, phase));
739
740 /* quickly compute phase factors without calling exp() function */
741 std::vector<std::complex<T>> phase_gk(num_gkvec_loc());
742 #pragma omp parallel for
743 for (int igk_loc = 0; igk_loc < num_gkvec_loc(); igk_loc++) {
744 auto G = gkvec().template gvec<index_domain_t::local>(igk_loc);
745 /* total phase e^{-i(G+k)r_{\alpha}} */
746 phase_gk[igk_loc] = std::conj(static_cast<std::complex<T>>(ctx_.gvec_phase_factor(G, ia)) * phase_k);
747 }
748
749 int iat = unit_cell_.atom(ia).type_id();
750 #pragma omp parallel
751 for (int xi = 0; xi < static_cast<int>(indexb__(iat)->size()); xi++) {
752 #pragma omp for nowait
753 for (int igk_loc = 0; igk_loc < num_gkvec_loc(); igk_loc++) {
754 wf__.pw_coeffs(igk_loc, wf::spin_index(0), wf::band_index(offset[ia] + xi)) =
755 wf_t[iat](igk_loc, xi) * phase_gk[igk_loc];
756 }
757 }
758 }
759}
760
761template <typename T>
762void
764{
765 /* find local number of row G+k vectors */
766 splindex_block_cyclic<> spl_ngk_row(num_gkvec(), n_blocks(num_ranks_row_), block_id(rank_row_),
767 ctx_.cyclic_block_size());
768 num_gkvec_row_ = spl_ngk_row.local_size();
769
770 /* find local number of column G+k vectors */
771 splindex_block_cyclic<> spl_ngk_col(num_gkvec(), n_blocks(num_ranks_col_), block_id(rank_col_),
772 ctx_.cyclic_block_size());
773 num_gkvec_col_ = spl_ngk_col.local_size();
774
775 if (ctx_.full_potential()) {
776 splindex_block_cyclic<> spl_nlo_row(num_gkvec() + unit_cell_.mt_lo_basis_size(),
777 n_blocks(num_ranks_row_), block_id(rank_row_), ctx_.cyclic_block_size());
778 splindex_block_cyclic<> spl_nlo_col(num_gkvec() + unit_cell_.mt_lo_basis_size(),
779 n_blocks(num_ranks_col_), block_id(rank_col_), ctx_.cyclic_block_size());
780
781 lo_basis_descriptor lo_desc;
782
783 int idx{0};
784 /* local orbital basis functions */
785 for (int ia = 0; ia < unit_cell_.num_atoms(); ia++) {
786 auto& atom = unit_cell_.atom(ia);
787 auto& type = atom.type();
788
789 int lo_index_offset = type.mt_aw_basis_size();
790
791 for (int j = 0; j < type.mt_lo_basis_size(); j++) {
792 int l = type.indexb(lo_index_offset + j).am.l();
793 int lm = type.indexb(lo_index_offset + j).lm;
794 int order = type.indexb(lo_index_offset + j).order;
795 int idxrf = type.indexb(lo_index_offset + j).idxrf;
796 lo_desc.ia = static_cast<uint16_t>(ia);
797 lo_desc.l = static_cast<uint8_t>(l);
798 lo_desc.lm = static_cast<uint16_t>(lm);
799 lo_desc.order = static_cast<uint8_t>(order);
800 lo_desc.idxrf = static_cast<uint8_t>(idxrf);
801
802 if (spl_nlo_row.location(num_gkvec() + idx).ib == rank_row_) {
803 lo_basis_descriptors_row_.push_back(lo_desc);
804 }
805 if (spl_nlo_col.location(num_gkvec() + idx).ib == rank_col_) {
806 lo_basis_descriptors_col_.push_back(lo_desc);
807 }
808
809 idx++;
810 }
811 }
812 RTE_ASSERT(idx == unit_cell_.mt_lo_basis_size());
813
814 atom_lo_cols_.clear();
815 atom_lo_cols_.resize(unit_cell_.num_atoms());
816 for (int i = 0; i < num_lo_col(); i++) {
817 int ia = lo_basis_descriptor_col(i).ia;
818 atom_lo_cols_[ia].push_back(i);
819 }
820
821 atom_lo_rows_.clear();
822 atom_lo_rows_.resize(unit_cell_.num_atoms());
823 for (int i = 0; i < num_lo_row(); i++) {
824 int ia = lo_basis_descriptor_row(i).ia;
825 atom_lo_rows_[ia].push_back(i);
826 }
827 }
828}
829
830template class K_point<double>;
831#ifdef SIRIUS_USE_FP32
832template class K_point<float>;
833#endif
834} // namespace sirius
Interface to the HDF5 library.
Definition: hdf5_tree.hpp:86
HDF5_tree create_node(int idx)
Create node by integer index.
Definition: hdf5_tree.hpp:424
void write(const std::string &name, T const *data, std::vector< int > const &dims)
Write a multidimensional array.
Definition: hdf5_tree.hpp:301
K-point related variables and methods.
Definition: k_point.hpp:44
void generate_atomic_wave_functions(std::vector< int > atoms__, std::function< basis_functions_index const *(int)> indexb__, Radial_integrals_atomic_wf< false > const &ri__, wf::Wave_functions< T > &wf__)
Generate plane-wave coefficients of the atomic wave-functions.
Definition: k_point.cpp:678
void update()
Update the reciprocal lattice vectors of the G+k array.
Definition: k_point.cpp:393
void generate_gkvec(double gk_cutoff__)
Find G+k vectors within the cutoff.
Definition: k_point.cpp:340
void get_fv_eigen_vectors(sddk::mdarray< std::complex< T >, 2 > &fv_evec__) const
Collect distributed first-variational vectors into a global array.
Definition: k_point.cpp:425
void initialize()
Initialize the k-point related arrays and data.
Definition: k_point.cpp:33
void generate_gklo_basis()
Generate G+k and local orbital basis sets.
Definition: k_point.cpp:763
void save(std::string const &name__, int id__) const
Save data to HDF5 file.
Definition: k_point.cpp:539
Radial integrals of the atomic centered orbitals.
Spline< double > const & values(int iwf__, int iat__) const
Retrieve a value for a given orbital of an atom type.
A helper class to establish various index mappings for the atomic basis functions.
Distributed matrix.
Definition: dmatrix.hpp:56
void zero(memory_t mem__, size_t idx0__, size_t n__)
Zero n elements starting from idx0.
Definition: memory.hpp:1316
splindex< Index_t >::location_t location(typename Index_t::global idx__) const
Return "local index, rank" pair for a global index.
Definition: splindex.hpp:409
value_type local_size(block_id block_id__) const
Return local size of the split index for a given block.
Definition: splindex.hpp:387
Wave-functions representation.
auto & pw_coeffs(int ig__, spin_index ispn__, band_index i__)
Return reference to the plane-wave coefficient for a given plane-wave, spin and band indices.
Describe a range of bands.
Describe a range of spins.
Compute inverse square root of the matrix.
Contains definition of sirius::K_point class.
auto inverse_sqrt(la::dmatrix< T > &A__, int N__)
Compute inverse square root of the matrix.
@ dlaf
DLA-Future solver.
auto spherical_coordinates(vector< double > vc)
Transform Cartesian coordinates [x,y,z] to spherical coordinates [r,theta,phi].
Definition: r3.hpp:590
int lmax(int lmmax__)
Get maximum orbital quantum number by the maximum lm index.
Definition: specfunc.hpp:56
int lmmax(int lmax)
Maximum number of combinations for a given .
Definition: specfunc.hpp:44
int lm(int l, int m)
Get composite lm index by angular index l and azimuthal index m.
Definition: specfunc.hpp:50
std::vector< int > l_by_lm(int lmax__)
Get array of orbital quantum numbers for each lm component.
Definition: specfunc.hpp:69
void spherical_harmonics(int lmax, double theta, double phi, std::complex< double > *ylm)
Optimized implementation of complex spherical harmonics.
Definition: specfunc.hpp:298
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > transform(::spla::Context &spla_ctx__, sddk::memory_t mem__, la::dmatrix< F > const &M__, int irow0__, int jcol0__, real_type< F > alpha__, Wave_functions< T > const &wf_in__, spin_index s_in__, band_range br_in__, real_type< F > beta__, Wave_functions< T > &wf_out__, spin_index s_out__, band_range br_out__)
Apply linear transformation to the wave-functions.
void copy(sddk::memory_t mem__, Wave_functions< T > const &in__, wf::spin_index s_in__, wf::band_range br_in__, Wave_functions< F > &out__, wf::spin_index s_out__, wf::band_range br_out__)
Copy wave-functions.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
strong_type< int, struct __block_id_tag > block_id
ID of the block.
Definition: splindex.hpp:108
strong_type< int, struct __rf_index_tag > rf_index
Radial function index.
const double twopi
Definition: constants.hpp:45
const double fourpi
Definition: constants.hpp:48
strong_type< int, struct __n_blocks_tag > n_blocks
Number of blocks to which the global index is split.
Definition: splindex.hpp:105
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165
Contains declaration of sirius::Non_local_operator class.
Descriptor of the local-orbital part of the LAPW+lo basis.
Definition: typedefs.hpp:186
uint8_t l
Index of orbital quantum number .
Definition: typedefs.hpp:191
uint8_t order
Order of the local orbital radial function for the given orbital quantum number .
Definition: typedefs.hpp:199
uint8_t idxrf
Index of the local orbital radial function.
Definition: typedefs.hpp:202
uint16_t ia
Index of atom.
Definition: typedefs.hpp:188
uint16_t lm
Combined lm index.
Definition: typedefs.hpp:194