SIRIUS 7.5.0
Electronic structure library and applications
sum_fg_fl_yg.hpp
Go to the documentation of this file.
1// Copyright (c) 2013-2023 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file sum_fg_fl_yg.hpp
21 *
22 * \brief LAPW specific function to compute sum over plane-wave coefficients and spherical harmonics.
23 */
24
25#ifndef __SUM_FG_FL_YG_HPP__
26#define __SUM_FG_FL_YG_HPP__
27
28namespace sirius {
29
30/// Compute sum over plane-wave coefficients and spherical harmonics that apperas in Poisson solver and finding of the
31/// MT boundary values.
32/** The following operation is performed:
33 * \f[
34 * q_{\ell m}^{\alpha} = \sum_{\bf G} 4\pi \rho({\bf G})
35 * e^{i{\bf G}{\bf r}_{\alpha}}i^{\ell}f_{\ell}^{\alpha}(G) Y_{\ell m}^{*}(\hat{\bf G})
36 * \f]
37 */
38inline auto
39sum_fg_fl_yg(Simulation_context const& ctx__, int lmax__, std::complex<double> const* fpw__, sddk::mdarray<double, 3>& fl__,
40 sddk::matrix<std::complex<double>>& gvec_ylm__)
41{
42 PROFILE("sirius::sum_fg_fl_yg");
43
44 int ngv_loc = ctx__.gvec().count();
45
46 int na_max{0};
47 for (int iat = 0; iat < ctx__.unit_cell().num_atom_types(); iat++) {
48 na_max = std::max(na_max, ctx__.unit_cell().atom_type(iat).num_atoms());
49 }
50
51 const int lmmax = sf::lmmax(lmax__);
52 /* resuling matrix */
53 sddk::mdarray<std::complex<double>, 2> flm(lmmax, ctx__.unit_cell().num_atoms());
54
58
59 switch (ctx__.processing_unit()) {
60 case sddk::device_t::CPU: {
61 auto& mp = get_memory_pool(sddk::memory_t::host);
62 phase_factors = sddk::matrix<std::complex<double>>(ngv_loc, na_max, mp);
63 zm = sddk::matrix<std::complex<double>>(lmmax, ngv_loc, mp);
64 tmp = sddk::matrix<std::complex<double>>(lmmax, na_max, mp);
65 break;
66 }
67 case sddk::device_t::GPU: {
68 auto& mp = get_memory_pool(sddk::memory_t::host);
69 auto& mpd = get_memory_pool(sddk::memory_t::device);
70 phase_factors = sddk::matrix<std::complex<double>>(nullptr, ngv_loc, na_max);
71 phase_factors.allocate(mpd);
72 zm = sddk::matrix<std::complex<double>>(lmmax, ngv_loc, mp);
73 zm.allocate(mpd);
74 tmp = sddk::matrix<std::complex<double>>(lmmax, na_max, mp);
75 tmp.allocate(mpd);
76 break;
77 }
78 }
79
80 std::vector<std::complex<double>> zil(lmax__ + 1);
81 for (int l = 0; l <= lmax__; l++) {
82 zil[l] = std::pow(std::complex<double>(0, 1), l);
83 }
84
85 for (int iat = 0; iat < ctx__.unit_cell().num_atom_types(); iat++) {
86 const int na = ctx__.unit_cell().atom_type(iat).num_atoms();
87 ctx__.generate_phase_factors(iat, phase_factors);
88 PROFILE_START("sirius::sum_fg_fl_yg|zm");
89 #pragma omp parallel for schedule(static)
90 for (int igloc = 0; igloc < ngv_loc; igloc++) {
91 for (int l = 0, lm = 0; l <= lmax__; l++) {
92 std::complex<double> z = fourpi * fl__(l, igloc, iat) * zil[l] * fpw__[igloc];
93 for (int m = -l; m <= l; m++, lm++) {
94 zm(lm, igloc) = z * std::conj(gvec_ylm__(lm, igloc));
95 }
96 }
97 }
98 PROFILE_STOP("sirius::sum_fg_fl_yg|zm");
99 PROFILE_START("sirius::sum_fg_fl_yg|mul");
100 switch (ctx__.processing_unit()) {
101 case sddk::device_t::CPU: {
103 .gemm('N', 'N', lmmax, na, ngv_loc, &la::constant<std::complex<double>>::one(), zm.at(sddk::memory_t::host),
104 zm.ld(), phase_factors.at(sddk::memory_t::host), phase_factors.ld(),
105 &la::constant<std::complex<double>>::zero(), tmp.at(sddk::memory_t::host), tmp.ld());
106 break;
107 }
108 case sddk::device_t::GPU: {
109 zm.copy_to(sddk::memory_t::device);
111 .gemm('N', 'N', lmmax, na, ngv_loc, &la::constant<std::complex<double>>::one(), zm.at(sddk::memory_t::device),
112 zm.ld(), phase_factors.at(sddk::memory_t::device), phase_factors.ld(),
113 &la::constant<std::complex<double>>::zero(), tmp.at(sddk::memory_t::device), tmp.ld());
114 tmp.copy_to(sddk::memory_t::host);
115 break;
116 }
117 }
118 PROFILE_STOP("sirius::sum_fg_fl_yg|mul");
119
120 for (int i = 0; i < na; i++) {
121 const int ia = ctx__.unit_cell().atom_type(iat).atom_id(i);
122 std::copy(&tmp(0, i), &tmp(0, i) + lmmax, &flm(0, ia));
123 }
124 }
125
126 ctx__.comm().allreduce(&flm(0, 0), (int)flm.size());
127
128 return flm;
129}
130
131}
132
133#endif
Simulation context is a set of parameters and objects describing a single simulation.
auto const & gvec() const
Return const reference to Gvec object.
void generate_phase_factors(int iat__, sddk::mdarray< std::complex< double >, 2 > &phase_factors__) const
Generate phase factors for all atoms of a given type.
mpi::Communicator const & comm() const
Total communicator of the simulation.
void gemm(char transa, char transb, ftn_int m, ftn_int n, ftn_int k, T const *alpha, T const *A, ftn_int lda, T const *B, ftn_int ldb, T const *beta, T *C, ftn_int ldc, acc::stream_id sid=acc::stream_id(-1)) const
General matrix-matrix multiplication.
void allreduce(T *buffer__, int count__) const
Perform the in-place (the output buffer is used as the input buffer) all-to-all reduction.
void copy_to(memory_t mem__, size_t idx0__, size_t n__, acc::stream_id sid=acc::stream_id(-1))
Copy n elements starting from idx0 from one memory type to another.
Definition: memory.hpp:1339
uint32_t ld() const
Return leading dimension size.
Definition: memory.hpp:1233
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Definition: memory.hpp:1057
size_t size() const
Return total size (number of elements) of the array.
Definition: memory.hpp:1207
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
Definition: acc.hpp:320
void zero(T *ptr__, size_t n__)
Zero the device memory.
Definition: acc.hpp:397
@ gpublas
GPU BLAS (cuBlas or ROCblas)
int lmmax(int lmax)
Maximum number of combinations for a given .
Definition: specfunc.hpp:44
int lm(int l, int m)
Get composite lm index by angular index l and azimuthal index m.
Definition: specfunc.hpp:50
Namespace of the SIRIUS library.
Definition: sirius.f90:5
const double fourpi
Definition: constants.hpp:48
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165
auto sum_fg_fl_yg(Simulation_context const &ctx__, int lmax__, std::complex< double > const *fpw__, sddk::mdarray< double, 3 > &fl__, sddk::matrix< std::complex< double > > &gvec_ylm__)