31double density_residual_hartree_energy(Density
const& rho1__, Density
const& rho2__)
34 auto const& gv = rho1__.ctx().gvec();
35 #pragma omp parallel for reduction(+:eh)
36 for (
int igloc = gv.skip_g0(); igloc < gv.count(); igloc++) {
37 auto z = rho1__.component(0).rg().f_pw_local(igloc) - rho2__.component(0).rg().f_pw_local(igloc);
39 eh += (std::pow(z.real(), 2) + std::pow(z.imag(), 2)) / std::pow(g, 2);
41 gv.comm().allreduce(&eh, 1);
42 eh *=
twopi * rho1__.ctx().unit_cell().omega();
51 std::complex<double>* rho_pw__)
53 PROFILE(
"sirius::Potential::poisson_add_pseudo_pw");
55 int lmmax = ctx_.lmmax_rho();
56 int ngv = ctx_.
gvec().count();
73 switch (ctx_.processing_unit()) {
74 case sddk::device_t::CPU: {
75 auto& mp = get_memory_pool(sddk::memory_t::host);
81 case sddk::device_t::GPU: {
82 auto& mp = get_memory_pool(sddk::memory_t::host);
83 auto& mpd = get_memory_pool(sddk::memory_t::device);
101 for (
int lm = 0;
lm < ctx_.lmmax_rho();
lm++) {
102 qa(
lm, i) = qmt__(
lm, ia) - qit__(
lm, ia);
106 switch (ctx_.processing_unit()) {
107 case sddk::device_t::CPU: {
110 qa.at(sddk::memory_t::host), qa.
ld(), pf.at(sddk::memory_t::host), pf.
ld(),
111 &
la::constant<std::complex<double>>::zero(), qapf.at(sddk::memory_t::host), qapf.
ld());
114 case sddk::device_t::GPU: {
115 qa.
copy_to(sddk::memory_t::device);
118 qa.at(sddk::memory_t::device), qa.
ld(), pf.at(sddk::memory_t::device), pf.
ld(),
119 &
la::constant<std::complex<double>>::zero(), qapf.at(sddk::memory_t::device), qapf.
ld());
120 qapf.
copy_to(sddk::memory_t::host);
129 #pragma omp parallel for schedule(static)
130 for (
int igloc = ctx_.
gvec().skip_g0(); igloc < ctx_.
gvec().count(); igloc++) {
132 double gRn = std::pow(2.0 / gR, pseudo_density_order_ + 1);
134 std::complex<double> rho_G(0, 0);
136 for (
int l = 0,
lm = 0; l <= ctx_.lmax_rho(); l++) {
137 std::complex<double> zt1(0, 0);
138 for (
int m = -l; m <= l; m++,
lm++) {
139 zt1 += gvec_ylm_(
lm, igloc) * qapf(
lm, igloc);
141 rho_G += fourpi_omega *
std::conj(zil_[l]) * zt1 * gamma_factors_R_(l, iat) *
142 sbessel_mt_(l + pseudo_density_order_ + 1, igloc, iat) * gRn;
144 rho_pw__[igloc] += rho_G;
148 std::complex<double> rho_G(0, 0);
151 rho_G += fourpi_omega *
y00 * (qmt__(0, ia) - qit__(0, ia));
153 rho_pw__[0] += rho_G;
160 PROFILE(
"sirius::Potential::poisson");
163 if (ctx_.full_potential()) {
168 if (env::print_checksum()) {
169 print_checksum(
"qmt", qmt.checksum(), ctx_.
out());
175 if (env::print_checksum()) {
176 print_checksum(
"qit", qit.checksum(), ctx_.
out());
182 if (ctx_.cfg().control().verification() >= 2) {
187 for (
int lm = 0;
lm < ctx_.lmmax_rho();
lm++) {
188 d += std::abs(qmt(
lm, ia) - qit(
lm, ia));
191 if (ctx_.verbosity() >= 1) {
192 RTE_OUT(ctx_.
out()) <<
"pseudocharge error: " << d << std::endl;
198 if (ctx_.
gvec().comm().rank() == 0) {
201 if (!ctx_.molecule()) {
202 #pragma omp parallel for
203 for (
int igloc = ctx_.
gvec().skip_g0(); igloc < ctx_.
gvec().count(); igloc++) {
216 #pragma omp parallel for
217 for (
int igloc = ctx_.
gvec().skip_g0(); igloc < ctx_.
gvec().count(); igloc++) {
220 (1.0 - std::cos(glen * R_cut));
232 if (ctx_.full_potential()) {
237 PROFILE(
"sirius::Potential::poisson|bc");
243 #pragma omp parallel for default(shared)
244 for (
int l = 0; l <= ctx_.lmax_pot(); l++) {
245 for (
int ir = 0; ir < nmtp; ir++) {
254 std::vector<double> vlm(ctx_.lmmax_pot());
255 SHT::convert(ctx_.lmax_pot(), &vmtlm(0, it.i), &vlm[0]);
257 #pragma omp parallel for default(shared)
258 for (
int lm = 0;
lm < ctx_.lmmax_pot();
lm++) {
259 int l = l_by_lm_[
lm];
261 for (
int ir = 0; ir < nmtp; ir++) {
280 if (env::print_checksum()) {
283 print_checksum(
"vha_rg", cs, ctx_.
out());
284 print_checksum(
"vha_pw", cs1, ctx_.
out());
288 energy_vha_ = sirius::inner(rho, hartree_potential());
292 if (ctx_.full_potential()) {
297 for (
int ir = 0; ir < atom.num_mt_points(); ir++) {
298 double r = atom.radial_grid(ir);
300 srho(ir) = rho.
mt()[it.i](0, ir) * r;
305 energy_vha_ += evha_nuc;
int atom_id(int idx) const
Return atom ID (global index) by the index of atom within a given type.
int num_mt_points() const
Return number of muffin-tin radial grid points.
double mt_radius() const
Return muffin-tin radius.
int num_atoms() const
Return number of atoms of a given type.
int type_id() const
Return atom type id.
Representation of the periodical function on the muffin-tin geometry.
auto & mt()
Return reference to spherical functions component.
auto & rg()
Return reference to regular space grid component.
void poisson(Periodic_function< double > const &rho)
Poisson solver.
std::unique_ptr< Periodic_function< double > > hartree_potential_
Hartree potential.
Unit_cell & unit_cell_
Alias to unit cell.
sddk::mdarray< double, 1 > vh_el_
Electronic part of Hartree potential.
auto poisson_vmt(Periodic_function< double > const &rho__) const
Compute MT part of the potential and MT multipole moments.
sddk::mdarray< double, 3 > sbessel_mom_
Moments of the spherical Bessel functions.
void poisson_add_pseudo_pw(sddk::mdarray< std::complex< double >, 2 > &qmt__, sddk::mdarray< std::complex< double >, 2 > &qit__, std::complex< double > *rho_pw__)
Add contribution from the pseudocharge to the plane-wave expansion.
static void convert(int lmax__, double const *f_rlm__, std::complex< double > *f_ylm__)
Convert form Rlm to Ylm representation.
auto const & gvec() const
Return const reference to Gvec object.
void generate_phase_factors(int iat__, sddk::mdarray< std::complex< double >, 2 > &phase_factors__) const
Generate phase factors for all atoms of a given type.
std::ostream & out() const
Return output stream.
mpi::Communicator const & comm() const
Total communicator of the simulation.
Spline< T, U > & interpolate()
double omega() const
Unit cell volume.
int max_num_mt_points() const
Maximum number of muffin-tin points among all atom types.
Atom const & atom(int id__) const
Return const atom instance by id.
int num_atom_types() const
Number of atom types.
Atom_type & atom_type(int id__)
Return atom type instance by id.
int num_atoms() const
Number of atoms in the unit cell.
void gemm(char transa, char transb, ftn_int m, ftn_int n, ftn_int k, T const *alpha, T const *A, ftn_int lda, T const *B, ftn_int ldb, T const *beta, T *C, ftn_int ldc, acc::stream_id sid=acc::stream_id(-1)) const
General matrix-matrix multiplication.
void allgather(T *buffer__, int const *recvcounts__, int const *displs__) const
In-place MPI_Allgatherv.
void allreduce(T *buffer__, int count__) const
Perform the in-place (the output buffer is used as the input buffer) all-to-all reduction.
int rank() const
Rank of MPI process inside communicator.
Multidimensional array with the column-major (Fortran) order.
void copy_to(memory_t mem__, size_t idx0__, size_t n__, acc::stream_id sid=acc::stream_id(-1))
Copy n elements starting from idx0 from one memory type to another.
uint32_t ld() const
Return leading dimension size.
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
@ gpublas
GPU BLAS (cuBlas or ROCblas)
int lmmax(int lmax)
Maximum number of combinations for a given .
int lm(int l, int m)
Get composite lm index by angular index l and azimuthal index m.
Namespace of the SIRIUS library.
const double y00
First spherical harmonic .
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
auto sum_fg_fl_yg(Simulation_context const &ctx__, int lmax__, std::complex< double > const *fpw__, sddk::mdarray< double, 3 > &fl__, sddk::matrix< std::complex< double > > &gvec_ylm__)
Contains declaration and partial implementation of sirius::Potential class.
LAPW specific function to compute sum over plane-wave coefficients and spherical harmonics.