38 PROFILE(
"sirius::Non_local_operator");
40 pu_ = this->ctx_.processing_unit();
41 auto& uc = this->ctx_.unit_cell();
43 packed_mtrx_size_ = 0;
45 for (
int ia = 0; ia < uc.num_atoms(); ia++) {
46 int nbf = uc.atom(ia).mt_basis_size();
47 packed_mtrx_offset_(ia) = packed_mtrx_size_;
48 packed_mtrx_size_ += nbf * nbf;
54 packed_mtrx_offset_.
allocate(memory_t::device).copy_to(memory_t::device);
80 PROFILE(
"sirius::D_operator::initialize");
82 auto& uc = this->ctx_.unit_cell();
84 const int s_idx[2][2] = {{0, 3}, {2, 1}};
86 #pragma omp parallel for
87 for (
int ia = 0; ia < uc.num_atoms(); ia++) {
88 auto& atom = uc.atom(ia);
89 int nbf = atom.mt_basis_size();
90 auto& dion = atom.type().d_mtrx_ion();
93 if (uc.atom(ia).type().spin_orbit_coupling()) {
94 mdarray<std::complex<T>, 3> d_mtrx_so(nbf, nbf, 4);
98 for (
int xi2 = 0; xi2 < nbf; xi2++) {
99 for (
int xi1 = 0; xi1 < nbf; xi1++) {
104 for (
int sigma = 0; sigma < 2; sigma++) {
105 for (
int sigmap = 0; sigmap < 2; sigmap++) {
106 std::complex<T> result(0, 0);
107 for (
auto xi2p = 0; xi2p < nbf; xi2p++) {
108 if (atom.type().compare_index_beta_functions(xi2, xi2p)) {
110 for (
auto xi1p = 0; xi1p < nbf; xi1p++) {
111 if (atom.type().compare_index_beta_functions(xi1, xi1p)) {
115 for (
int alpha = 0; alpha < 4; alpha++) {
116 for (
int sigma1 = 0; sigma1 < 2; sigma1++) {
117 for (
int sigma2 = 0; sigma2 < 2; sigma2++) {
118 result += atom.d_mtrx(xi1p, xi2p, alpha) *
120 atom.type().f_coefficients(xi1, xi1p, sigma, sigma1) *
121 atom.type().f_coefficients(xi2p, xi2, sigma2, sigmap);
129 d_mtrx_so(xi1, xi2, s_idx[sigma][sigmap]) = result;
141 for (
int xi2 = 0; xi2 < nbf; xi2++) {
142 int idxrf2 = atom.type().indexb(xi2).idxrf;
143 for (
int xi1 = 0; xi1 < nbf; xi1++) {
144 int idxrf1 = atom.type().indexb(xi1).idxrf;
145 if (atom.type().indexb(xi1).am == atom.type().indexb(xi2).am) {
147 d_mtrx_so(xi1, xi2, 0) += dion(idxrf1, idxrf2) * atom.type().f_coefficients(xi1, xi2, 0, 0);
148 d_mtrx_so(xi1, xi2, 1) += dion(idxrf1, idxrf2) * atom.type().f_coefficients(xi1, xi2, 1, 1);
151 d_mtrx_so(xi1, xi2, 2) += dion(idxrf1, idxrf2) * atom.type().f_coefficients(xi1, xi2, 0, 1);
152 d_mtrx_so(xi1, xi2, 3) += dion(idxrf1, idxrf2) * atom.type().f_coefficients(xi1, xi2, 1, 0);
163 for (
int xi2 = 0; xi2 < nbf; xi2++) {
164 for (
int xi1 = 0; xi1 < nbf; xi1++) {
165 int idx = xi2 * nbf + xi1;
166 for (
int s = 0; s < 4; s++) {
167 this->op_(0, this->packed_mtrx_offset_(ia) + idx, s) = d_mtrx_so(xi1, xi2, s).real();
168 this->op_(1, this->packed_mtrx_offset_(ia) + idx, s) = d_mtrx_so(xi1, xi2, s).imag();
176 for (
int xi2 = 0; xi2 < nbf; xi2++) {
177 int lm2 = atom.type().indexb(xi2).lm;
178 int idxrf2 = atom.type().indexb(xi2).idxrf;
179 for (
int xi1 = 0; xi1 < nbf; xi1++) {
180 int lm1 = atom.type().indexb(xi1).lm;
181 int idxrf1 = atom.type().indexb(xi1).idxrf;
183 int idx = xi2 * nbf + xi1;
184 switch (this->ctx_.num_mag_dims()) {
186 T bx = uc.atom(ia).d_mtrx(xi1, xi2, 2);
187 T by = uc.atom(ia).d_mtrx(xi1, xi2, 3);
189 this->op_(0, this->packed_mtrx_offset_(ia) + idx, 2) = bx;
190 this->op_(1, this->packed_mtrx_offset_(ia) + idx, 2) = -by;
192 this->op_(0, this->packed_mtrx_offset_(ia) + idx, 3) = bx;
193 this->op_(1, this->packed_mtrx_offset_(ia) + idx, 3) = by;
196 T v = uc.atom(ia).d_mtrx(xi1, xi2, 0);
197 T bz = uc.atom(ia).d_mtrx(xi1, xi2, 1);
201 v += dion(idxrf1, idxrf2);
204 this->op_(0, this->packed_mtrx_offset_(ia) + idx, 0) = v + bz;
205 this->op_(0, this->packed_mtrx_offset_(ia) + idx, 1) = v - bz;
209 this->op_(0, this->packed_mtrx_offset_(ia) + idx, 0) = uc.atom(ia).d_mtrx(xi1, xi2, 0);
212 this->op_(0, this->packed_mtrx_offset_(ia) + idx, 0) += dion(idxrf1, idxrf2);
217 RTE_THROW(
"wrong number of magnetic dimensions");
225 if (env::print_checksum()) {
226 auto cs = this->op_.checksum();
227 print_checksum(
"D_operator", cs, this->ctx_.out());
230 if (this->pu_ == device_t::GPU && uc.max_mt_basis_size() != 0) {
231 this->op_.allocate(memory_t::device).copy_to(memory_t::device);
236 if (this->ctx_.num_mag_dims() == 3) {
237 this->is_diag_ =
false;
242Q_operator<T>::Q_operator(Simulation_context
const& ctx__)
243 : Non_local_operator<T>(ctx__)
247 if (this->ctx_.gamma_point()) {
248 this->op_ = mdarray<T, 3>(1, this->packed_mtrx_size_, this->ctx_.num_mag_dims() + 1);
250 this->op_ = mdarray<T, 3>(2, this->packed_mtrx_size_, this->ctx_.num_mag_dims() + 1);
260 PROFILE(
"sirius::Q_operator::initialize");
262 auto& uc = this->ctx_.unit_cell();
264 #pragma omp parallel for
265 for (
int ia = 0; ia < uc.num_atoms(); ia++) {
266 int iat = uc.atom(ia).type().id();
267 if (!uc.atom_type(iat).augment()) {
270 int nbf = uc.atom(ia).mt_basis_size();
271 for (
int xi2 = 0; xi2 < nbf; xi2++) {
272 for (
int xi1 = 0; xi1 < nbf; xi1++) {
275 if (uc.atom_type(iat).spin_orbit_coupling()) {
277 for (
auto si = 0; si < 2; si++) {
278 for (
auto sj = 0; sj < 2; sj++) {
280 std::complex<T> result(0, 0);
282 for (
int xi2p = 0; xi2p < nbf; xi2p++) {
283 if (uc.atom(ia).type().compare_index_beta_functions(xi2, xi2p)) {
284 for (
int xi1p = 0; xi1p < nbf; xi1p++) {
287 if (uc.atom(ia).type().compare_index_beta_functions(xi1, xi1p)) {
288 result += this->ctx_.augmentation_op(iat).q_mtrx(xi1p, xi2p) *
289 (uc.atom(ia).type().f_coefficients(xi1, xi1p, sj, 0) *
290 uc.atom(ia).type().f_coefficients(xi2p, xi2, 0, si) +
291 uc.atom(ia).type().f_coefficients(xi1, xi1p, sj, 1) *
292 uc.atom(ia).type().f_coefficients(xi2p, xi2, 1, si));
299 const int ind = (si == sj) ? si : sj + 2;
305 this->op_(0, this->packed_mtrx_offset_(ia) + xi2 * nbf + xi1, ind) = result.real();
306 this->op_(1, this->packed_mtrx_offset_(ia) + xi2 * nbf + xi1, ind) = result.imag();
310 for (
int ispn = 0; ispn < this->ctx_.num_spins(); ispn++) {
311 this->op_(0, this->packed_mtrx_offset_(ia) + xi2 * nbf + xi1, ispn) =
312 this->ctx_.augmentation_op(iat).q_mtrx(xi1, xi2);
318 if (env::print_checksum()) {
319 auto cs = this->op_.checksum();
320 print_checksum(
"Q_operator", cs, this->ctx_.out());
323 if (this->pu_ == device_t::GPU && uc.max_mt_basis_size() != 0) {
324 this->op_.allocate(memory_t::device).copy_to(memory_t::device);
327 this->is_null_ =
true;
328 for (
int iat = 0; iat < uc.num_atom_types(); iat++) {
329 if (uc.atom_type(iat).augment()) {
330 this->is_null_ =
false;
333 if (uc.atom_type(iat).spin_orbit_coupling()) {
334 this->is_diag_ =
false;
341Non_local_operator<T>::size(
int i)
const
343 if (i == 0 || i == 1) {
346 RTE_THROW(
"invalid dimension");
353Non_local_operator<T>::get_matrix(
int ispn, memory_t mem)
const
355 static_assert(is_complex<F>::value,
"not implemented for gamma point");
357 using double_complex = std::complex<double>;
359 auto& uc = ctx_.unit_cell();
360 std::vector<int> offsets(uc.num_atoms() + 1, 0);
361 for (
int ia = 0; ia < uc.num_atoms(); ++ia) {
362 offsets[ia + 1] = offsets[ia] + uc.atom(ia).mt_basis_size();
365 sddk::matrix<double_complex> O(this->size(0), this->size(1), mem);
367 int num_atoms = uc.num_atoms();
368 for (
int ia = 0; ia < num_atoms; ++ia) {
369 int offset = offsets[ia];
370 int lsize = offsets[ia + 1] - offsets[ia];
371 if (mem == memory_t::device) {
372 double_complex* out_ptr = O.at(memory_t::device, offset, offset);
373 const double_complex* op_ptr =
374 reinterpret_cast<const double_complex*
>(op_.at(memory_t::device, 0, packed_mtrx_offset_(ia), ispn));
376 for (
int col = 0; col < lsize; ++col) {
377 acc::copy(out_ptr + col * O.ld(), op_ptr + col * lsize, lsize);
379 }
else if (mem == memory_t::host) {
380 double_complex* out_ptr = O.at(memory_t::host, offset, offset);
381 const double_complex* op_ptr =
382 reinterpret_cast<const double_complex*
>(op_.at(memory_t::host, 0, packed_mtrx_offset_(ia), ispn));
384 for (
int col = 0; col < lsize; ++col) {
385 std::copy(op_ptr + col * lsize, op_ptr + col * lsize + lsize, out_ptr + col * O.ld());
388 RTE_THROW(
"invalid memory type.");
394template sddk::matrix<std::complex<double>> Non_local_operator<double>::get_matrix(
int, memory_t)
const;
396template class Non_local_operator<double>;
398template class D_operator<double>;
400template class Q_operator<double>;
402#if defined(SIRIUS_USE_FP32)
403template class Non_local_operator<float>;
405template class D_operator<float>;
407template class Q_operator<float>;
Contains declaration and implementation of sirius::Beta_projectors_base class.
Non-local part of the Hamiltonian and S-operator in the pseudopotential method.
Simulation context is a set of parameters and objects describing a single simulation.
bool gamma_point(bool gamma_point__)
Set flag for Gamma-point calculation.
int num_mag_dims() const
Number of dimensions in the magnetization vector.
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Base class for Hubbard occupancy and potential matrices.
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
Namespace of the SIRIUS library.
void initialize(bool call_mpi_init__=true)
Initialize the library.
const std::complex< double > pauli_matrix[4][2][2]
Pauli matrices in {I, Z, X, Y} order.
Contains declaration of sirius::Non_local_operator class.