SIRIUS 7.5.0
Electronic structure library and applications
occupation_matrix.cpp
Go to the documentation of this file.
1// Copyright (c) 2013-2022 Mathieu Taillefumier, Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file occupation_matrix.cpp
21 *
22 * \brief Occupation matrix of the LDA+U method.
23 */
24
25#include "occupation_matrix.hpp"
28
29namespace sirius {
30
31Occupation_matrix::Occupation_matrix(Simulation_context& ctx__)
32 : Hubbard_matrix(ctx__)
33{
34 if (!ctx_.hubbard_correction()) {
35 return;
36 }
37 /* a pair of "total number, offests" for the Hubbard orbitals idexing */
38 int nhwf = ctx_.unit_cell().num_hubbard_wf().first;
39
40 /* find all possible translations */
41 for (int i = 0; i < ctx_.cfg().hubbard().nonlocal().size(); i++) {
42 auto nl = ctx_.cfg().hubbard().nonlocal(i);
43 int ia = nl.atom_pair()[0];
44 int ja = nl.atom_pair()[1];
45 auto T = nl.T();
46
47 auto& sym = ctx_.unit_cell().symmetry();
48
49 for (int isym = 0; isym < sym.size(); isym++) {
50 auto Ttot = sym[isym].spg_op.inv_sym_atom_T[ja] - sym[isym].spg_op.inv_sym_atom_T[ia] +
51 dot(sym[isym].spg_op.invR, r3::vector<int>(T));
52 if (!occ_mtrx_T_.count(Ttot)) {
53 occ_mtrx_T_[Ttot] = sddk::mdarray<std::complex<double>, 3>(nhwf, nhwf, ctx_.num_mag_comp());
54 occ_mtrx_T_[Ttot].zero();
55 }
56 }
57 }
58}
59
60template <typename T>
61void
62Occupation_matrix::add_k_point_contribution(K_point<T>& kp__)
63{
64 if (!ctx_.hubbard_correction()) {
65 return;
66 }
67
68 PROFILE("sirius::Occupation_matrix::add_k_point_contribution");
69
70 sddk::memory_t mem_host{sddk::memory_t::host};
71 sddk::memory_t mem{sddk::memory_t::host};
72 la::lib_t la{la::lib_t::blas};
73 if (ctx_.processing_unit() == sddk::device_t::GPU) {
74 mem = sddk::memory_t::device;
75 mem_host = sddk::memory_t::host_pinned;
76 la = la::lib_t::gpublas;
77 }
78
79 /* a pair of "total number, offests" for the Hubbard orbitals idexing */
80 auto r = ctx_.unit_cell().num_hubbard_wf();
81
82 int nwfu = r.first;
83
84 sddk::matrix<std::complex<T>> occ_mtrx(nwfu, nwfu, get_memory_pool(sddk::memory_t::host), "occ_mtrx");
85 if (is_device_memory(mem)) {
86 occ_mtrx.allocate(get_memory_pool(mem));
87 }
88
89 // TODO collnear and non-collinear cases have a lot of similar code; there should be a way to combine it
90
91 /* full non collinear magnetism */
92 if (ctx_.num_mag_dims() == 3) {
93 la::dmatrix<std::complex<T>> dm(kp__.num_occupied_bands(), nwfu, get_memory_pool(mem_host), "dm");
94 if (is_device_memory(mem)) {
95 dm.allocate(get_memory_pool(mem));
96 }
97 wf::inner(ctx_.spla_context(), mem, wf::spin_range(0, 2), kp__.spinor_wave_functions(),
98 wf::band_range(0, kp__.num_occupied_bands()), kp__.hubbard_wave_functions_S(),
99 wf::band_range(0, nwfu), dm, 0, 0);
100
101 la::dmatrix<std::complex<T>> dm1(kp__.num_occupied_bands(), nwfu, get_memory_pool(mem_host), "dm1");
102 #pragma omp parallel for
103 for (int m = 0; m < nwfu; m++) {
104 for (int j = 0; j < kp__.num_occupied_bands(); j++) {
105 dm1(j, m) = dm(j, m) * static_cast<T>(kp__.band_occupancy(j, 0));
106 }
107 }
108 if (is_device_memory(mem)) {
109 dm1.allocate(get_memory_pool(mem)).copy_to(mem);
110 }
111
112 /* now compute O_{ij}^{sigma,sigma'} = \sum_{nk} <psi_nk|phi_{i,sigma}><phi_{j,sigma^'}|psi_nk> f_{nk} */
113 auto alpha = std::complex<T>(kp__.weight(), 0.0);
114 la::wrap(la).gemm('C', 'N', nwfu, nwfu, kp__.num_occupied_bands(), &alpha, dm.at(mem), dm.ld(), dm1.at(mem),
115 dm1.ld(), &la::constant<std::complex<T>>::zero(), occ_mtrx.at(mem), occ_mtrx.ld());
116 if (is_device_memory(mem)) {
117 occ_mtrx.copy_to(sddk::memory_t::host);
118 }
119
120 #pragma omp parallel for schedule(static)
121 for (int at_lvl = 0; at_lvl < static_cast<int>(local_.size()); at_lvl++) {
122 const int ia = atomic_orbitals_[at_lvl].first;
123 auto const& atom = ctx_.unit_cell().atom(ia);
124 // we can skip this atomic level if it does not contribute to the Hubbard correction (or U = 0)
125 if (atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).use_for_calculation()) {
126
127 /* loop over the different channels */
128 /* note that for atom with SO interactions, we need to jump
129 by 2 instead of 1. This is due to the fact that the
130 relativistic wave functions have different total angular
131 momentum for the same n
132 */
133 int s_idx[2][2] = {{0, 3}, {2, 1}};
134 const int lmmax_at = 2 * atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).l() + 1;
135 for (int s1 = 0; s1 < ctx_.num_spins(); s1++) {
136 for (int s2 = 0; s2 < ctx_.num_spins(); s2++) {
137 for (int mp = 0; mp < lmmax_at; mp++) {
138 for (int m = 0; m < lmmax_at; m++) {
139 local_[at_lvl](m, mp, s_idx[s1][s2]) +=
140 occ_mtrx(r.first * s1 + offset_[at_lvl] + m, r.first * s2 + offset_[at_lvl] + mp);
141 }
142 }
143 }
144 }
145 }
146 }
147 } else {
148 /* SLDA + U, we need to do the explicit calculation. The hubbard
149 orbitals only have one component while the bloch wave functions
150 have two. The inner product takes care of this case internally. */
151
152 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
153 if (kp__.num_occupied_bands(ispn) == 0) {
154 continue;
155 }
156 la::dmatrix<std::complex<T>> dm(kp__.num_occupied_bands(ispn), nwfu, get_memory_pool(mem_host), "dm");
157 if (is_device_memory(mem)) {
158 dm.allocate(get_memory_pool(mem));
159 }
160 /* compute <psi | phi> where |phi> are the Hubbard WFs */
161 wf::inner(ctx_.spla_context(), mem, wf::spin_range(ispn), kp__.spinor_wave_functions(),
162 wf::band_range(0, kp__.num_occupied_bands(ispn)), kp__.hubbard_wave_functions_S(),
163 wf::band_range(0, nwfu), dm, 0, 0);
164
165 la::dmatrix<std::complex<T>> dm1(kp__.num_occupied_bands(ispn), nwfu, get_memory_pool(mem_host), "dm1");
166 #pragma omp parallel for
167 for (int m = 0; m < nwfu; m++) {
168 for (int j = 0; j < kp__.num_occupied_bands(ispn); j++) {
169 dm1(j, m) = dm(j, m) * static_cast<T>(kp__.band_occupancy(j, ispn));
170 }
171 }
172 if (is_device_memory(mem)) {
173 dm1.allocate(get_memory_pool(mem)).copy_to(mem);
174 }
175 /* now compute O_{ij}^{sigma,sigma'} = \sum_{nk} <psi_nk|phi_{i,sigma}><phi_{j,sigma^'}|psi_nk> f_{nk} */
176 /* We need to apply a factor 1/2 when we compute the occupancies for the LDA+U. It is because the
177 * calculations of E and U consider occupancies <= 1. Sirius for the LDA+U has a factor 2 in the
178 * band occupancies. We need to compensate for it because it is taken into account in the
179 * calculation of the hubbard potential */
180 auto alpha = std::complex<T>(kp__.weight() / ctx_.max_occupancy(), 0.0);
181 la::wrap(la).gemm('C', 'N', nwfu, nwfu, kp__.num_occupied_bands(ispn), &alpha, dm.at(mem), dm.ld(),
182 dm1.at(mem), dm1.ld(), &la::constant<std::complex<T>>::zero(), occ_mtrx.at(mem),
183 occ_mtrx.ld());
184 if (is_device_memory(mem)) {
185 occ_mtrx.copy_to(sddk::memory_t::host);
186 }
187
188 #pragma omp parallel for
189 for (int at_lvl = 0; at_lvl < static_cast<int>(local_.size()); at_lvl++) {
190 const int ia = atomic_orbitals_[at_lvl].first;
191 auto const& atom = ctx_.unit_cell().atom(ia);
192 // we can skip the symmetrization for this atomic level if it does not contribute to the Hubbard
193 // correction (or U = 0)
194 if (atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).use_for_calculation()) {
195
196 const int lmmax_at = 2 * atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).l() + 1;
197 for (int mp = 0; mp < lmmax_at; mp++) {
198 const int mmp = offset_[at_lvl] + mp;
199 for (int m = 0; m < lmmax_at; m++) {
200 const int mm = offset_[at_lvl] + m;
201 local_[at_lvl](m, mp, ispn) += occ_mtrx(mm, mmp);
202 }
203 }
204 }
205 }
206
207 for (auto& e : this->occ_mtrx_T_) {
208 /* e^{-i k T} */
209 auto z1 = std::exp(std::complex<double>(0, -twopi * dot(e.first, kp__.vk())));
210 for (int i = 0; i < nwfu; i++) {
211 for (int j = 0; j < nwfu; j++) {
212 e.second(i, j, ispn) += static_cast<std::complex<T>>(occ_mtrx(i, j)) * static_cast<std::complex<T>>(z1);
213 }
214 }
215 }
216 } // ispn
217 }
218}
219
220template void Occupation_matrix::add_k_point_contribution<double>(K_point<double>& kp__);
221#ifdef SIRIUS_USE_FP32
222template void Occupation_matrix::add_k_point_contribution<float>(K_point<float>& kp__);
223#endif
224
225void
226Occupation_matrix::init()
227{
228 if (!ctx_.hubbard_correction()) {
229 return;
230 }
231
232 this->zero();
233 #pragma omp parallel for schedule(static)
234 for (int at_lvl = 0; at_lvl < static_cast<int>(local_.size()); at_lvl++) {
235 const int ia = atomic_orbitals_[at_lvl].first;
236 auto const& atom = ctx_.unit_cell().atom(ia);
237
238 if (atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).use_for_calculation()) {
239
240 int il = atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).l();
241 const int lmax_at = 2 * il + 1;
242 if (atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).initial_occupancy().size()) {
243 /* if we specify the occcupancy in the input file */
244 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
245 for (int m = 0; m < lmax_at; m++) {
246 this->local_[at_lvl](m, m, ispn) = atom.type()
247 .lo_descriptor_hub(atomic_orbitals_[at_lvl].second)
248 .initial_occupancy()[m + ispn * lmax_at];
249 }
250 }
251 } else {
252 // compute the total charge for the hubbard orbitals
253 double charge = atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).occupancy();
254 bool nm = true; // true if the atom is non magnetic
255 int majs, mins;
256 if (ctx_.num_spins() != 1) {
257 if (atom.vector_field()[2] > 0.0) {
258 nm = false;
259 majs = 0;
260 mins = 1;
261 } else if (atom.vector_field()[2] < 0.0) {
262 nm = false;
263 majs = 1;
264 mins = 0;
265 }
266 }
267
268 if (!nm) {
269 if (ctx_.num_mag_dims() != 3) {
270 // collinear case
271 if (charge > (lmax_at)) {
272 for (int m = 0; m < lmax_at; m++) {
273 this->local_[at_lvl](m, m, majs) = 1.0;
274 this->local_[at_lvl](m, m, mins) =
275 (charge - static_cast<double>(lmax_at)) / static_cast<double>(lmax_at);
276 }
277 } else {
278 for (int m = 0; m < lmax_at; m++) {
279 this->local_[at_lvl](m, m, majs) = charge / static_cast<double>(lmax_at);
280 }
281 }
282 } else {
283 // double c1, s1;
284 // sincos(atom.type().starting_magnetization_theta(), &s1, &c1);
285 double c1 = atom.vector_field()[2];
286 std::complex<double> cs =
287 std::complex<double>(atom.vector_field()[0], atom.vector_field()[1]) / sqrt(1.0 - c1 * c1);
288 std::complex<double> ns[4];
289
290 if (charge > (lmax_at)) {
291 ns[majs] = 1.0;
292 ns[mins] = (charge - static_cast<double>(lmax_at)) / static_cast<double>(lmax_at);
293 } else {
294 ns[majs] = charge / static_cast<double>(lmax_at);
295 ns[mins] = 0.0;
296 }
297
298 // charge and moment
299 double nc = ns[majs].real() + ns[mins].real();
300 double mag = ns[majs].real() - ns[mins].real();
301
302 // rotate the occ matrix
303 ns[0] = (nc + mag * c1) * 0.5;
304 ns[1] = (nc - mag * c1) * 0.5;
305 ns[2] = mag * std::conj(cs) * 0.5;
306 ns[3] = mag * cs * 0.5;
307
308 for (int m = 0; m < lmax_at; m++) {
309 this->local_[at_lvl](m, m, 0) = ns[0];
310 this->local_[at_lvl](m, m, 1) = ns[1];
311 this->local_[at_lvl](m, m, 2) = ns[2];
312 this->local_[at_lvl](m, m, 3) = ns[3];
313 }
314 }
315 } else {
316 for (int s = 0; s < ctx_.num_spins(); s++) {
317 for (int m = 0; m < lmax_at; m++) {
318 this->local_[at_lvl](m, m, s) = charge * 0.5 / static_cast<double>(lmax_at);
319 }
320 }
321 }
322 }
323 }
324 }
325
326 print_occupancies(2);
327}
328
329void
330Occupation_matrix::print_occupancies(int verbosity__) const
331{
332 if (!ctx_.hubbard_correction()) {
333 return;
334 }
335
336 if (ctx_.comm().rank() == 0) {
337 std::stringstream s;
338 /* print local part */
339 if (ctx_.verbosity() >= verbosity__) {
340 for (int at_lvl = 0; at_lvl < static_cast<int>(local_.size()); at_lvl++) {
341 auto const& atom = ctx_.unit_cell().atom(atomic_orbitals_[at_lvl].first);
342 int il = atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).l();
343 if (atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).use_for_calculation()) {
344 Hubbard_matrix::print_local(at_lvl, s);
345 double occ[2] = {0, 0};
346 for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
347 for (int m = 0; m < 2 * il + 1; m++) {
348 occ[ispn] += this->local_[at_lvl](m, m, ispn).real();
349 }
350 }
351 if (ctx_.num_spins() == 2) {
352 s << "Atom charge (total) " << occ[0] + occ[1] << " (n_up) " << occ[0] << " (n_down) " << occ[1]
353 << " (mz) " << occ[0] - occ[1] << std::endl;
354 } else {
355 s << "Atom charge (total) " << 2 * occ[0] << std::endl;
356 }
357 }
358 }
359 }
360 /* print non-local part */
361 if (ctx_.cfg().hubbard().nonlocal().size() && (ctx_.verbosity() >= verbosity__ + 1)) {
362 s << std::endl;
363 for (int i = 0; i < static_cast<int>(ctx_.cfg().hubbard().nonlocal().size()); i++) {
364 Hubbard_matrix::print_nonlocal(i, s);
365 }
366 }
367 if (ctx_.verbosity() >= verbosity__) {
368 ctx_.message(1, "occ.mtrx", s);
369 }
370 }
371}
372
373} // namespace sirius
Contains definition and partial implementation of sirius::Crystal_symmetry class.
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
Definition: memory.hpp:93
void zero(T *ptr__, size_t n__)
Zero the device memory.
Definition: acc.hpp:397
lib_t
Type of linear algebra backend library.
Definition: linalg_base.hpp:70
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
@ nm
Non-magnetic case.
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165
Occupation matrix of the LDA+U method.
Symmetrize occupation matrix of the LDA+U method.