SIRIUS 7.5.0
Electronic structure library and applications
s_u_operator.cpp
2
3namespace sirius {
4
5template <class T>
6U_operator<T>::U_operator(Simulation_context const& ctx__, Hubbard_matrix const& um1__, std::array<double, 3> vk__)
7 : ctx_(ctx__)
8 , vk_(vk__)
9{
10 if (!ctx_.hubbard_correction()) {
11 return;
12 }
13 /* a pair of "total number, offests" for the Hubbard orbitals idexing */
14 auto r = ctx_.unit_cell().num_hubbard_wf();
15 this->nhwf_ = r.first;
16 this->offset_ = um1__.offset();
17 this->atomic_orbitals_ = um1__.atomic_orbitals();
18 for (int j = 0; j < ctx_.num_mag_dims() + 1; j++) {
19 um_[j] = la::dmatrix<std::complex<T>>(r.first, r.first);
20 um_[j].zero();
21 }
22
23 /* copy local blocks */
24 for (int at_lvl = 0; at_lvl < static_cast<int>(um1__.atomic_orbitals().size()); at_lvl++) {
25 const int ia = um1__.atomic_orbitals(at_lvl).first;
26 auto& atom_type = ctx_.unit_cell().atom(ia).type();
27 int lo_ind = um1__.atomic_orbitals(at_lvl).second;
28 if (atom_type.lo_descriptor_hub(lo_ind).use_for_calculation()) {
29 int lmmax_at = 2 * atom_type.lo_descriptor_hub(lo_ind).l() + 1;
30 for (int j = 0; j < ctx_.num_mag_dims() + 1; j++) {
31 for (int m2 = 0; m2 < lmmax_at; m2++) {
32 for (int m1 = 0; m1 < lmmax_at; m1++) {
33 um_[j](um1__.offset(at_lvl) + m1, um1__.offset(at_lvl) + m2) = um1__.local(at_lvl)(m1, m2, j);
34 }
35 }
36 }
37 }
38 }
39
40 for (int i = 0; i < ctx_.cfg().hubbard().nonlocal().size(); i++) {
41 auto nl = ctx_.cfg().hubbard().nonlocal(i);
42 int ia = nl.atom_pair()[0];
43 int ja = nl.atom_pair()[1];
44 int il = nl.l()[0];
45 int jl = nl.l()[1];
46 auto Tr = nl.T();
47
48 /* we need to find the index of the radial function corresponding to the atomic level of each atom. */
49 int at1_lvl = um1__.find_orbital_index(ia, nl.n()[0], il);
50 int at2_lvl = um1__.find_orbital_index(ja, nl.n()[1], jl);
51
52 auto z1 = std::exp(std::complex<double>(0, twopi * dot(vk_, r3::vector<int>(Tr))));
53 for (int is = 0; is < ctx_.num_spins(); is++) {
54 for (int m1 = 0; m1 < 2 * il + 1; m1++) {
55 for (int m2 = 0; m2 < 2 * jl + 1; m2++) {
56 um_[is](um1__.offset(at1_lvl) + m1, um1__.offset(at2_lvl) + m2) +=
57 z1 * um1__.nonlocal(i)(m1, m2, is);
58 }
59 }
60 }
61 }
62 for (int is = 0; is < ctx_.num_spins(); is++) {
63 auto diff = check_hermitian(um_[is], r.first);
64 if (diff > 1e-10) {
65 RTE_THROW("um is not Hermitian");
66 }
67 if (env::print_checksum()) {
68 print_checksum("um" + std::to_string(is), um_[is].checksum(r.first, r.first), RTE_OUT(ctx_.out()));
69 }
70 if (ctx_.processing_unit() == sddk::device_t::GPU) {
71 um_[is].allocate(get_memory_pool(sddk::memory_t::device)).copy_to(sddk::memory_t::device);
72 }
73 }
74}
75
76template <class T>
77int
78U_operator<T>::find_orbital_index(const int ia__, const int n__, const int l__) const
79{
80 int at_lvl = 0;
81 for (at_lvl = 0; at_lvl < static_cast<int>(atomic_orbitals_.size()); at_lvl++) {
82 int lo_ind = atomic_orbitals_[at_lvl].second;
83 int atom_id = atomic_orbitals_[at_lvl].first;
84 if ((atomic_orbitals_[at_lvl].first == ia__) &&
85 (ctx_.unit_cell().atom(atom_id).type().lo_descriptor_hub(lo_ind).n() == n__) &&
86 (ctx_.unit_cell().atom(atom_id).type().lo_descriptor_hub(lo_ind).l() == l__))
87 break;
88 }
89
90 if (at_lvl == static_cast<int>(atomic_orbitals_.size())) {
91 std::cout << "atom: " << ia__ << "n: " << n__ << ", l: " << l__ << std::endl;
92 RTE_THROW("Found an arbital that is not listed\n");
93 }
94 return at_lvl;
95}
96
97template class U_operator<double>;
98#if defined(SIRIUS_USE_FP32)
99template class U_operator<float>;
100#endif
101
102/** Apply Hubbard U correction
103 * \tparam T Precision type of wave-functions (flat or double).
104 * \param [in] hub_wf Hubbard atomic wave-functions.
105 * \param [in] phi Set of wave-functions to which Hubbard correction is applied.
106 * \param [out] hphi Output wave-functions to which the result is added.
107 */
108template <typename T>
109void
111 wf::Wave_functions<T> const& hub_wf__, wf::Wave_functions<T> const& phi__, U_operator<T> const& um__,
112 wf::Wave_functions<T>& hphi__)
113{
114 if (!ctx__.hubbard_correction()) {
115 return;
116 }
117
118 la::dmatrix<std::complex<T>> dm(hub_wf__.num_wf().get(), br__.size());
119
120 auto mt = ctx__.processing_unit_memory_t();
121 auto la = la::lib_t::blas;
122 if (is_device_memory(mt)) {
123 la = la::lib_t::gpublas;
124 dm.allocate(mt);
125 }
126
127 /* First calculate the local part of the projections
128 dm(i, n) = <phi_i| S |psi_{nk}> */
129 wf::inner(ctx__.spla_context(), mt, spins__, hub_wf__, wf::band_range(0, hub_wf__.num_wf().get()), phi__, br__, dm,
130 0, 0);
131
132 la::dmatrix<std::complex<T>> Up(hub_wf__.num_wf().get(), br__.size());
133 if (is_device_memory(mt)) {
134 Up.allocate(mt);
135 }
136
137 if (ctx__.num_mag_dims() == 3) {
138 Up.zero();
139 #pragma omp parallel for schedule(static)
140 for (int at_lvl = 0; at_lvl < (int)um__.atomic_orbitals().size(); at_lvl++) {
141 const int ia = um__.atomic_orbitals(at_lvl).first;
142 auto const& atom = ctx__.unit_cell().atom(ia);
143 if (atom.type().lo_descriptor_hub(um__.atomic_orbitals(at_lvl).second).use_for_calculation()) {
144 const int lmax_at = 2 * atom.type().lo_descriptor_hub(um__.atomic_orbitals(at_lvl).second).l() + 1;
145 // we apply the hubbard correction. For now I have no papers
146 // giving me the formula for the SO case so I rely on QE for it
147 // but I do not like it at all
148 for (int s1 = 0; s1 < ctx__.num_spins(); s1++) {
149 for (int s2 = 0; s2 < ctx__.num_spins(); s2++) {
150 // TODO: replace this with matrix matrix multiplication
151 for (int nbd = 0; nbd < br__.size(); nbd++) {
152 for (int m1 = 0; m1 < lmax_at; m1++) {
153 for (int m2 = 0; m2 < lmax_at; m2++) {
154 const int ind = (s1 == s2) * s1 + (1 + 2 * s2 + s1) * (s1 != s2);
155 Up(um__.nhwf() * s1 + um__.offset(at_lvl) + m1, nbd) +=
156 um__(um__.offset(at_lvl) + m2, um__.offset(at_lvl) + m1, ind) *
157 dm(um__.nhwf() * s2 + um__.offset(at_lvl) + m2, nbd);
158 }
159 }
160 }
161 }
162 }
163 }
164 }
165 } else {
166 la::wrap(la).gemm('N', 'N', um__.nhwf(), br__.size(), um__.nhwf(), &la::constant<std::complex<T>>::one(),
167 um__.at(mt, 0, 0, spins__.begin().get()), um__.nhwf(), dm.at(mt, 0, 0), dm.ld(),
168 &la::constant<std::complex<T>>::zero(), Up.at(mt, 0, 0), Up.ld());
169 if (is_device_memory(mt)) {
170 Up.copy_to(sddk::memory_t::host);
171 }
172 }
173 for (auto s = spins__.begin(); s != spins__.end(); s++) {
174 auto sp = hub_wf__.actual_spin_index(s);
175 auto sp1 = hphi__.actual_spin_index(s);
176 wf::transform(ctx__.spla_context(), mt, Up, 0, 0, 1.0, hub_wf__, sp, wf::band_range(0, hub_wf__.num_wf().get()),
177 1.0, hphi__, sp1, br__);
178 }
179}
180
181template void apply_U_operator<double>(Simulation_context&, wf::spin_range, wf::band_range,
182 const wf::Wave_functions<double>&, const wf::Wave_functions<double>&,
183 U_operator<double> const&, wf::Wave_functions<double>&);
184
185#ifdef SIRIUS_USE_FP32
186template void apply_U_operator<float>(Simulation_context&, wf::spin_range, wf::band_range,
187 const wf::Wave_functions<float>&, const wf::Wave_functions<float>&,
188 U_operator<float> const&, wf::Wave_functions<float>&);
189#endif
190
191/// Apply strain derivative of S-operator to all scalar functions.
192void
195 Beta_projector_generator<double>& bp_strain_deriv__,
196 beta_projectors_coeffs_t<double>& bp_strain_deriv_coeffs__,
199{
200 if (sddk::is_device_memory(mem__)) {
201 RTE_ASSERT((bp__.device_t() == sddk::device_t::GPU));
202 }
203 // NOTE: Beta_projectors_generator knows the target memory!
204 using complex_t = std::complex<double>;
205
206 RTE_ASSERT(ds_phi__.num_wf() == phi__.num_wf());
207 for (int ichunk = 0; ichunk < bp__.num_chunks(); ichunk++) {
208 /* generate beta-projectors for a block of atoms */
209 bp__.generate(bp_coeffs__, ichunk);
210 /* generate derived beta-projectors for a block of atoms */
211 bp_strain_deriv__.generate(bp_strain_deriv_coeffs__, ichunk, comp__);
212
213 auto host_mem = bp__.ctx().host_memory_t();
214 auto& spla_ctx = bp__.ctx().spla_context();
215 auto band_range_phi = wf::band_range(0, phi__.num_wf().get());
216 bool result_on_device = bp__.ctx().processing_unit() == sddk::device_t::GPU;
217 auto dbeta_phi = inner_prod_beta<complex_t>(spla_ctx, mem__, host_mem, result_on_device,
218 bp_strain_deriv_coeffs__, phi__, wf::spin_index(0), band_range_phi);
219 auto beta_phi = inner_prod_beta<complex_t>(spla_ctx, mem__, host_mem, result_on_device, bp_coeffs__, phi__,
220 wf::spin_index(0), band_range_phi);
221
222 auto band_range = wf::band_range(0, ds_phi__.num_wf().get());
223 q_op__.apply(mem__, ichunk, 0, ds_phi__, band_range, bp_coeffs__, dbeta_phi);
224 q_op__.apply(mem__, ichunk, 0, ds_phi__, band_range, bp_strain_deriv_coeffs__, beta_phi);
225 }
226}
227
228} // namespace sirius
void apply(sddk::memory_t mem__, int chunk__, int ispn_block__, wf::Wave_functions< T > &op_phi__, wf::band_range br__, beta_projectors_coeffs_t< T > const &beta_coeffs__, sddk::matrix< F > const &beta_phi__) const
Apply chunk of beta-projectors to all wave functions.
Simulation context is a set of parameters and objects describing a single simulation.
auto processing_unit_memory_t() const
Return the memory type for processing unit.
auto host_memory_t() const
Type of the host memory for arrays used in linear algebra operations.
int num_spins() const
Number of spin components.
int num_mag_dims() const
Number of dimensions in the magnetization vector.
Distributed matrix.
Definition: dmatrix.hpp:56
void gemm(char transa, char transb, ftn_int m, ftn_int n, ftn_int k, T const *alpha, T const *A, ftn_int lda, T const *B, ftn_int ldb, T const *beta, T *C, ftn_int ldc, acc::stream_id sid=acc::stream_id(-1)) const
General matrix-matrix multiplication.
void zero(memory_t mem__, size_t idx0__, size_t n__)
Zero n elements starting from idx0.
Definition: memory.hpp:1316
uint32_t ld() const
Return leading dimension size.
Definition: memory.hpp:1233
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Definition: memory.hpp:1057
auto num_wf() const
Return number of wave-functions.
auto actual_spin_index(spin_index s__) const
Return the actual spin index of the wave-functions.
Wave-functions representation.
Describe a range of bands.
Describe a range of spins.
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
Definition: memory.hpp:93
memory_t
Memory types where the code can store data.
Definition: memory.hpp:71
void zero(T *ptr__, size_t n__)
Zero the device memory.
Definition: acc.hpp:397
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > transform(::spla::Context &spla_ctx__, sddk::memory_t mem__, la::dmatrix< F > const &M__, int irow0__, int jcol0__, real_type< F > alpha__, Wave_functions< T > const &wf_in__, spin_index s_in__, band_range br_in__, real_type< F > beta__, Wave_functions< T > &wf_out__, spin_index s_out__, band_range br_out__)
Apply linear transformation to the wave-functions.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
void apply_U_operator(Simulation_context &ctx__, wf::spin_range spins__, wf::band_range br__, wf::Wave_functions< T > const &hub_wf__, wf::Wave_functions< T > const &phi__, U_operator< T > const &um__, wf::Wave_functions< T > &hphi__)
void apply_S_operator_strain_deriv(sddk::memory_t mem__, int comp__, Beta_projector_generator< double > &bp__, beta_projectors_coeffs_t< double > &bp_coeffs__, Beta_projector_generator< double > &bp_strain_deriv__, beta_projectors_coeffs_t< double > &bp_strain_deriv_coeffs__, wf::Wave_functions< double > &phi__, Q_operator< double > &q_op__, wf::Wave_functions< double > &ds_phi__)
Apply strain derivative of S-operator to all scalar functions.
Contains declaration of sirius::Non_local_operator class.
Stores a chunk of the beta-projector and metadata.