34 PROFILE(
"sirius::Augmentation_operator::generate_pw_coeffs");
38 auto const& tp = gvec_.gvec_tp();
41 int lmax_beta = atom_type_.
indexr().lmax();
47 int nqlm = nbf * (nbf + 1) / 2;
49 int gvec_count = gvec_.
count();
57 switch (atom_type_.parameters().processing_unit()) {
58 case sddk::device_t::CPU:
59 case sddk::device_t::GPU: {
62 #pragma omp parallel for
63 for (
int igloc = 0; igloc < gvec_count; igloc++) {
64 std::vector<double> rlm(
lmmax);
66 std::vector<std::complex<double>> v(
lmmax);
67 for (
int idx12 = 0; idx12 < nqlm; idx12++) {
68 int lm1 = idx_(0, idx12);
69 int lm2 = idx_(1, idx12);
70 int idxrf12 = idx_(2, idx12);
71 for (
int lm3 = 0; lm3 <
lmmax; lm3++) {
72 v[lm3] =
std::conj(zilm_[lm3]) * rlm[lm3] *
73 ri_values_(idxrf12, l_by_lm_[lm3], gvec_.gvec_shell_idx_local(igloc));
75 std::complex<double> z = fourpi_omega * gaunt_coefs.
sum_L3_gaunt(lm2, lm1, &v[0]);
76 q_pw_(idx12, 2 * igloc) = z.real();
77 q_pw_(idx12, 2 * igloc + 1) = z.imag();
109 if (gvec_.comm().
rank() == 0) {
110 for (
int xi2 = 0; xi2 < nbf; xi2++) {
111 for (
int xi1 = 0; xi1 <= xi2; xi1++) {
114 q_mtrx_(xi1, xi2) = q_mtrx_(xi2, xi1) = gvec_.
omega() * q_pw_(idx12, 0);
119 gvec_.comm().
bcast(&q_mtrx_(0, 0), nbf * nbf, 0);
121 if (env::print_checksum()) {
125 if (gvec_.comm().
rank() == 0) {
126 print_checksum(
"q_pw", cs, std::cout);
127 print_checksum(
"q_mtrx", cs1, std::cout);
137 PROFILE(
"sirius::Augmentation_operator::generate_pw_coeffs_gvec_deriv");
139 auto const& tp = gvec_.gvec_tp();
142 int lmax_beta = atom_type_.
indexr().lmax();
148 int gvec_count = gvec_.
count();
150 switch (atom_type_.parameters().processing_unit()) {
151 case sddk::device_t::GPU:
152 case sddk::device_t::CPU: {
155 #pragma omp parallel for
156 for (
int igloc = 0; igloc < gvec_count; igloc++) {
158 int igsh = gvec_.gvec_shell_idx_local(igloc);
161 double gvc_nu = gvc[nu__];
163 std::vector<double> rlm(
lmmax);
166 const bool divide_by_r{
false};
167 sf::dRlm_dr(2 * lmax_beta, gvc, rlm_dq, divide_by_r);
169 std::vector<std::complex<double>> v(
lmmax);
170 for (
int idx12 = 0; idx12 < nbf * (nbf + 1) / 2; idx12++) {
171 int lm1 = idx_(0, idx12);
172 int lm2 = idx_(1, idx12);
173 int idxrf12 = idx_(2, idx12);
174 for (
int lm3 = 0; lm3 <
lmmax; lm3++) {
175 int l = l_by_lm_[lm3];
176 v[lm3] =
std::conj(zilm_[lm3]) * (rlm_dq(lm3, nu__) * ri_values_(idxrf12, l, igsh) +
177 rlm[lm3] * ri_dq_values_(idxrf12, l, igsh) * gvc_nu);
180 q_pw_(idx12, 2 * igloc) = z.real();
181 q_pw_(idx12, 2 * igloc + 1) = z.imag();
Contains implementation of sirius::Augmentation_operator class.
int mt_basis_size() const
Total number of muffin-tin basis functions (APW + LO).
auto const & indexr() const
Return const reference to the index of radial functions.
bool augment() const
Return true if this atom type has an augementation charge.
void generate_pw_coeffs()
Generate Q_{xi,xi'}(G) plane wave coefficients.
void generate_pw_coeffs_gvec_deriv(int nu__)
Generate G-vector derivative Q_{xi,xi'}(G)/dG of the plane-wave coefficients */.
Compact storage of non-zero Gaunt coefficients .
auto sum_L3_gaunt(int lm1, int lm2, std::complex< double > const *v) const
Return a sum over L3 (lm3) index of Gaunt coefficients and a complex vector.
static double gaunt_rrr(int l1, int l2, int l3, int m1, int m2, int m3)
Gaunt coefficent of three real spherical harmonics.
int count() const
Number of G-vectors for a fine-grained distribution for the current MPI rank.
double omega() const
Return the volume of the real space unit cell that corresponds to the reciprocal lattice of G-vectors...
r3::vector< double > gvec_cart(int ig__) const
Return G vector in Cartesian coordinates.
void bcast(T *buffer__, int count__, int root__) const
Perform buffer broadcast.
void allreduce(T *buffer__, int count__) const
Perform the in-place (the output buffer is used as the input buffer) all-to-all reduction.
int rank() const
Rank of MPI process inside communicator.
void zero(memory_t mem__, size_t idx0__, size_t n__)
Zero n elements starting from idx0.
T checksum(size_t idx0__, size_t size__) const
Compute checksum.
int lmmax(int lmax)
Maximum number of combinations for a given .
void dRlm_dr(int lmax__, r3::vector< double > &r__, sddk::mdarray< double, 2 > &data__, bool divide_by_r__=true)
Compute the derivatives of real spherical harmonics over the components of cartesian vector.
void spherical_harmonics(int lmax, double theta, double phi, std::complex< double > *ylm)
Optimized implementation of complex spherical harmonics.
Namespace of the SIRIUS library.
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
int packed_index(int i__, int j__)
Pack two indices into one for symmetric matrices.