31Occupation_matrix::Occupation_matrix(Simulation_context& ctx__)
32 : Hubbard_matrix(ctx__)
34 if (!ctx_.hubbard_correction()) {
38 int nhwf = ctx_.unit_cell().num_hubbard_wf().first;
41 for (
int i = 0; i < ctx_.cfg().hubbard().nonlocal().size(); i++) {
42 auto nl = ctx_.cfg().hubbard().nonlocal(i);
43 int ia = nl.atom_pair()[0];
44 int ja = nl.atom_pair()[1];
47 auto& sym = ctx_.unit_cell().symmetry();
49 for (
int isym = 0; isym < sym.size(); isym++) {
50 auto Ttot = sym[isym].spg_op.inv_sym_atom_T[ja] - sym[isym].spg_op.inv_sym_atom_T[ia] +
51 dot(sym[isym].spg_op.invR, r3::vector<int>(T));
52 if (!occ_mtrx_T_.count(Ttot)) {
53 occ_mtrx_T_[Ttot] = sddk::mdarray<std::complex<double>, 3>(nhwf, nhwf, ctx_.num_mag_comp());
54 occ_mtrx_T_[Ttot].zero();
62Occupation_matrix::add_k_point_contribution(K_point<T>& kp__)
64 if (!ctx_.hubbard_correction()) {
68 PROFILE(
"sirius::Occupation_matrix::add_k_point_contribution");
70 sddk::memory_t mem_host{sddk::memory_t::host};
71 sddk::memory_t mem{sddk::memory_t::host};
73 if (ctx_.processing_unit() == sddk::device_t::GPU) {
74 mem = sddk::memory_t::device;
75 mem_host = sddk::memory_t::host_pinned;
76 la = la::lib_t::gpublas;
80 auto r = ctx_.unit_cell().num_hubbard_wf();
84 sddk::matrix<std::complex<T>> occ_mtrx(nwfu, nwfu, get_memory_pool(sddk::memory_t::host),
"occ_mtrx");
86 occ_mtrx.allocate(get_memory_pool(mem));
92 if (ctx_.num_mag_dims() == 3) {
93 la::dmatrix<std::complex<T>> dm(kp__.num_occupied_bands(), nwfu, get_memory_pool(mem_host),
"dm");
95 dm.allocate(get_memory_pool(mem));
97 wf::inner(ctx_.spla_context(), mem, wf::spin_range(0, 2), kp__.spinor_wave_functions(),
98 wf::band_range(0, kp__.num_occupied_bands()), kp__.hubbard_wave_functions_S(),
99 wf::band_range(0, nwfu), dm, 0, 0);
101 la::dmatrix<std::complex<T>> dm1(kp__.num_occupied_bands(), nwfu, get_memory_pool(mem_host),
"dm1");
102 #pragma omp parallel for
103 for (
int m = 0; m < nwfu; m++) {
104 for (
int j = 0; j < kp__.num_occupied_bands(); j++) {
105 dm1(j, m) = dm(j, m) *
static_cast<T
>(kp__.band_occupancy(j, 0));
109 dm1.allocate(get_memory_pool(mem)).copy_to(mem);
113 auto alpha = std::complex<T>(kp__.weight(), 0.0);
114 la::wrap(la).gemm(
'C',
'N', nwfu, nwfu, kp__.num_occupied_bands(), &alpha, dm.at(mem), dm.ld(), dm1.at(mem),
115 dm1.ld(), &la::constant<std::complex<T>>
::zero(), occ_mtrx.at(mem), occ_mtrx.ld());
117 occ_mtrx.copy_to(sddk::memory_t::host);
120 #pragma omp parallel for schedule(static)
121 for (
int at_lvl = 0; at_lvl < static_cast<int>(local_.size()); at_lvl++) {
122 const int ia = atomic_orbitals_[at_lvl].first;
123 auto const& atom = ctx_.unit_cell().atom(ia);
125 if (atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).use_for_calculation()) {
133 int s_idx[2][2] = {{0, 3}, {2, 1}};
134 const int lmmax_at = 2 * atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).l() + 1;
135 for (
int s1 = 0; s1 < ctx_.num_spins(); s1++) {
136 for (
int s2 = 0; s2 < ctx_.num_spins(); s2++) {
137 for (
int mp = 0; mp < lmmax_at; mp++) {
138 for (
int m = 0; m < lmmax_at; m++) {
139 local_[at_lvl](m, mp, s_idx[s1][s2]) +=
140 occ_mtrx(r.first * s1 + offset_[at_lvl] + m, r.first * s2 + offset_[at_lvl] + mp);
152 for (
int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
153 if (kp__.num_occupied_bands(ispn) == 0) {
156 la::dmatrix<std::complex<T>> dm(kp__.num_occupied_bands(ispn), nwfu, get_memory_pool(mem_host),
"dm");
158 dm.allocate(get_memory_pool(mem));
161 wf::inner(ctx_.spla_context(), mem, wf::spin_range(ispn), kp__.spinor_wave_functions(),
162 wf::band_range(0, kp__.num_occupied_bands(ispn)), kp__.hubbard_wave_functions_S(),
163 wf::band_range(0, nwfu), dm, 0, 0);
165 la::dmatrix<std::complex<T>> dm1(kp__.num_occupied_bands(ispn), nwfu, get_memory_pool(mem_host),
"dm1");
166 #pragma omp parallel for
167 for (
int m = 0; m < nwfu; m++) {
168 for (
int j = 0; j < kp__.num_occupied_bands(ispn); j++) {
169 dm1(j, m) = dm(j, m) *
static_cast<T
>(kp__.band_occupancy(j, ispn));
173 dm1.allocate(get_memory_pool(mem)).copy_to(mem);
180 auto alpha = std::complex<T>(kp__.weight() / ctx_.max_occupancy(), 0.0);
181 la::wrap(la).gemm(
'C',
'N', nwfu, nwfu, kp__.num_occupied_bands(ispn), &alpha, dm.at(mem), dm.ld(),
182 dm1.at(mem), dm1.ld(), &la::constant<std::complex<T>>
::zero(), occ_mtrx.at(mem),
185 occ_mtrx.copy_to(sddk::memory_t::host);
188 #pragma omp parallel for
189 for (
int at_lvl = 0; at_lvl < static_cast<int>(local_.size()); at_lvl++) {
190 const int ia = atomic_orbitals_[at_lvl].first;
191 auto const& atom = ctx_.unit_cell().atom(ia);
194 if (atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).use_for_calculation()) {
196 const int lmmax_at = 2 * atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).l() + 1;
197 for (
int mp = 0; mp < lmmax_at; mp++) {
198 const int mmp = offset_[at_lvl] + mp;
199 for (
int m = 0; m < lmmax_at; m++) {
200 const int mm = offset_[at_lvl] + m;
201 local_[at_lvl](m, mp, ispn) += occ_mtrx(mm, mmp);
207 for (
auto& e : this->occ_mtrx_T_) {
209 auto z1 = std::exp(std::complex<double>(0, -twopi * dot(e.first, kp__.vk())));
210 for (
int i = 0; i < nwfu; i++) {
211 for (
int j = 0; j < nwfu; j++) {
212 e.second(i, j, ispn) +=
static_cast<std::complex<T>
>(occ_mtrx(i, j)) *
static_cast<std::complex<T>
>(z1);
220template void Occupation_matrix::add_k_point_contribution<double>(K_point<double>& kp__);
221#ifdef SIRIUS_USE_FP32
222template void Occupation_matrix::add_k_point_contribution<float>(K_point<float>& kp__);
226Occupation_matrix::init()
228 if (!ctx_.hubbard_correction()) {
233 #pragma omp parallel for schedule(static)
234 for (
int at_lvl = 0; at_lvl < static_cast<int>(local_.size()); at_lvl++) {
235 const int ia = atomic_orbitals_[at_lvl].first;
236 auto const& atom = ctx_.unit_cell().atom(ia);
238 if (atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).use_for_calculation()) {
240 int il = atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).l();
241 const int lmax_at = 2 * il + 1;
242 if (atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).initial_occupancy().size()) {
244 for (
int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
245 for (
int m = 0; m < lmax_at; m++) {
246 this->local_[at_lvl](m, m, ispn) = atom.type()
247 .lo_descriptor_hub(atomic_orbitals_[at_lvl].second)
248 .initial_occupancy()[m + ispn * lmax_at];
253 double charge = atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).occupancy();
256 if (ctx_.num_spins() != 1) {
257 if (atom.vector_field()[2] > 0.0) {
261 }
else if (atom.vector_field()[2] < 0.0) {
269 if (ctx_.num_mag_dims() != 3) {
271 if (charge > (lmax_at)) {
272 for (
int m = 0; m < lmax_at; m++) {
273 this->local_[at_lvl](m, m, majs) = 1.0;
274 this->local_[at_lvl](m, m, mins) =
275 (charge -
static_cast<double>(lmax_at)) /
static_cast<double>(lmax_at);
278 for (
int m = 0; m < lmax_at; m++) {
279 this->local_[at_lvl](m, m, majs) = charge /
static_cast<double>(lmax_at);
285 double c1 = atom.vector_field()[2];
286 std::complex<double> cs =
287 std::complex<double>(atom.vector_field()[0], atom.vector_field()[1]) / sqrt(1.0 - c1 * c1);
288 std::complex<double> ns[4];
290 if (charge > (lmax_at)) {
292 ns[mins] = (charge -
static_cast<double>(lmax_at)) /
static_cast<double>(lmax_at);
294 ns[majs] = charge /
static_cast<double>(lmax_at);
299 double nc = ns[majs].real() + ns[mins].real();
300 double mag = ns[majs].real() - ns[mins].real();
303 ns[0] = (nc + mag * c1) * 0.5;
304 ns[1] = (nc - mag * c1) * 0.5;
306 ns[3] = mag * cs * 0.5;
308 for (
int m = 0; m < lmax_at; m++) {
309 this->local_[at_lvl](m, m, 0) = ns[0];
310 this->local_[at_lvl](m, m, 1) = ns[1];
311 this->local_[at_lvl](m, m, 2) = ns[2];
312 this->local_[at_lvl](m, m, 3) = ns[3];
316 for (
int s = 0; s < ctx_.num_spins(); s++) {
317 for (
int m = 0; m < lmax_at; m++) {
318 this->local_[at_lvl](m, m, s) = charge * 0.5 /
static_cast<double>(lmax_at);
326 print_occupancies(2);
330Occupation_matrix::print_occupancies(
int verbosity__)
const
332 if (!ctx_.hubbard_correction()) {
336 if (ctx_.comm().rank() == 0) {
339 if (ctx_.verbosity() >= verbosity__) {
340 for (
int at_lvl = 0; at_lvl < static_cast<int>(local_.size()); at_lvl++) {
341 auto const& atom = ctx_.unit_cell().atom(atomic_orbitals_[at_lvl].first);
342 int il = atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).l();
343 if (atom.type().lo_descriptor_hub(atomic_orbitals_[at_lvl].second).use_for_calculation()) {
344 Hubbard_matrix::print_local(at_lvl, s);
345 double occ[2] = {0, 0};
346 for (
int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
347 for (
int m = 0; m < 2 * il + 1; m++) {
348 occ[ispn] += this->local_[at_lvl](m, m, ispn).real();
351 if (ctx_.num_spins() == 2) {
352 s <<
"Atom charge (total) " << occ[0] + occ[1] <<
" (n_up) " << occ[0] <<
" (n_down) " << occ[1]
353 <<
" (mz) " << occ[0] - occ[1] << std::endl;
355 s <<
"Atom charge (total) " << 2 * occ[0] << std::endl;
361 if (ctx_.cfg().hubbard().nonlocal().size() && (ctx_.verbosity() >= verbosity__ + 1)) {
363 for (
int i = 0; i < static_cast<int>(ctx_.cfg().hubbard().nonlocal().size()); i++) {
364 Hubbard_matrix::print_nonlocal(i, s);
367 if (ctx_.verbosity() >= verbosity__) {
368 ctx_.message(1,
"occ.mtrx", s);
Contains definition and partial implementation of sirius::Crystal_symmetry class.
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
void zero(T *ptr__, size_t n__)
Zero the device memory.
lib_t
Type of linear algebra backend library.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
Namespace of the SIRIUS library.
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Occupation matrix of the LDA+U method.
Symmetrize occupation matrix of the LDA+U method.