SIRIUS 7.5.0
Electronic structure library and applications
ultrasoft_precond_k.hpp
1// Copyright (c) 2023 Simon Pintarelli, Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file ultrasoft_precond.hpp
21 *
22 * \brief Provides preconditioner for ultrasoft case.
23 */
24
25#ifndef __ULTRASOFT_PRECOND_K_HPP__
26#define __ULTRASOFT_PRECOND_K_HPP__
27
31#include "SDDK/memory.hpp"
32#include "core/fft/gvec.hpp"
33#include "diag_mm.hpp"
34
35namespace sirius {
36
37namespace local {
38
40{
41 public:
42 OperatorBase(int n)
43 : n(n){};
44 OperatorBase() = delete;
45
46 int size() const
47 {
48 return n;
49 }
50
51 private:
52 int n;
53};
54
55} // namespace local
56
57template <class numeric_t>
59{
60 public:
62 : ctx_(ctx)
63 {
64 }
67
68 protected:
71};
72
73template <class numeric_t>
76{
77 auto Y = empty_like(X, sddk::get_memory_pool(pm));
78 this->apply(Y, X, pm);
79 return Y;
80}
81
82/// computes Y <- P*X
83template <class numeric_t>
84inline void
87{
88#ifdef SIRIUS_GPU
89 // copy d_ to gpu
90 if (sddk::is_device_memory(pm)) {
91 d_.allocate(sddk::memory_t::device);
92 d_.copy_to(sddk::memory_t::device);
93 int n = d_.size(0);
94 zdiagmm(d_.at(sddk::memory_t::device), n, X.at(sddk::memory_t::device), X.ld(), X.size(1),
95 Y.at(sddk::memory_t::device), Y.ld(), std::complex<double>{1});
96 return;
97 }
98#endif /*SIRIUS_GPU*/
99
100 int n = X.size(0);
101 #pragma omp parallel for
102 for (int i = 0; i < n; ++i) {
103 numeric_t d = d_(i);
104 for (int j = 0; j < static_cast<int>(X.size(1)); ++j) {
105 Y(i, j) = d * X(i, j);
106 }
107 }
108 return;
109}
110
111/** Payne, M. C., Teter, M. P., Allan, D. C., Arias, T. A., & Joannopoulos, J.
112 * D., Iterative minimization techniques for ab initio total-energy
113 * calculations: molecular dynamics and conjugate gradients.
114 * https://dx.doi.org/10.1103/RevModPhys.64.1045
115 */
116template <class numeric_t>
118{
119 public:
120 Teter(Simulation_context& ctx, const fft::Gvec& gkvec)
122 , local::OperatorBase(gkvec.count())
123 {
124 this->d_ = sddk::mdarray<numeric_t, 1>(gkvec.count());
125 for (int i = 0; i < gkvec.count(); ++i) {
126 // teter formula
127 double T = gkvec.gkvec_cart<index_domain_t::global>(i).length2();
128 double T2 = T * T;
129 double T3 = T2 * T;
130 double T4 = T2 * T2;
131 double Tp = 16 * T4 / (27 + 18 * T + 12 * T2 + 8 * T3);
132 // Eq. (5.16) in Payne et. al
133 this->d_(i) = 1 / (1 + Tp);
134 }
135 }
136
138};
139
140/** Ultrasoft preconditioner for direct minimization.
141 *
142 * (1+T)⁻¹ + G R G⊹
143 * where R = -Q (1 + C Q)⁻¹
144 * and G are the "preconditioned" beta projectors, C = B⊹ K B
145 * TODO: what is K?
146 *
147 * Hasnip, P. J., & Pickard, C. J. (). Electronic energy minimisation with
148 * ultrasoft pseudopotentials. , 174(1), 24–29.
149 * http://dx.doi.org/10.1016/j.cpc.2005.07.011
150 */
151template <class numeric_t>
153{
154 public:
155 Ultrasoft_preconditioner(Simulation_context& simulation_context, const Q_operator<double>& q_op, int ispn,
156 const Beta_projectors_base<double>& bp, const fft::Gvec& gkvec);
157
158 sddk::mdarray<numeric_t, 2> apply(const sddk::mdarray<numeric_t, 2>& X, sddk::memory_t pm = sddk::memory_t::none);
160 sddk::memory_t pm = sddk::memory_t::none);
161
162 const Simulation_context& ctx() const
163 {
164 return ctx_;
165 }
166
167 private:
168 // cannot be const, because memory pool is used
169 Simulation_context& ctx_;
171 const Q_operator<double>& q_op;
172 int ispn_;
176};
177
178template <class numeric_t>
180 const Q_operator<double>& q_op, int ispn,
182 const fft::Gvec& gkvec)
183 : local::OperatorBase(gkvec.count())
184 , ctx_(simulation_context)
185 , P(simulation_context, gkvec)
186 , q_op(q_op)
187 , ispn_(ispn)
188 , bp_(bp)
189{
190 using complex_t = std::complex<double>;
191 /* compute C <- <ϐ|P|ϐ> */
192 auto C = inner_beta(bp, simulation_context, [&simulation_context = this->ctx_, &P = this->P](auto& Y) {
193 return P.apply(Y, simulation_context.processing_unit_memory_t());
194 });
195
196 sddk::matrix<numeric_t> CQ(C.size(0), q_op.size(1), sddk::memory_t::host);
197 if (is_device_memory(ctx_.processing_unit_memory_t())) {
198 C.allocate(sddk::memory_t::host);
199 C.copy_to(sddk::memory_t::host);
200 }
201 /* compute C <- C@Q */
202 this->q_op.lmatmul(CQ, C, this->ispn_, sddk::memory_t::host);
203 /* compute C <- 1 + C */
204 int n = CQ.size(0);
205 // add identiy matrix
206 std::vector<complex_t> ones(n, 1);
207 // add identity matrix
208 la::wrap(la::lib_t::blas)
209 .axpy(n, &la::constant<complex_t>::one(), ones.data(), 1, CQ.at(sddk::memory_t::host), n + 1);
210 // compute LU factorization
211 this->LU_ = sddk::empty_like(CQ);
212 sddk::auto_copy(this->LU_, CQ);
213 this->ipiv_ = sddk::mdarray<int, 1>(n, sddk::memory_t::host);
214 // compute LU factorization
215 la::wrap(la::lib_t::lapack)
216 .getrf(n, n, this->LU_.at(sddk::memory_t::host), this->LU_.ld(), this->ipiv_.at(sddk::memory_t::host));
217 // copy LU factorization to device if needed
218 auto mem = ctx_.processing_unit_memory_t();
219 if (is_device_memory(mem)) {
220 ipiv_.allocate(mem);
221 ipiv_.copy_to(mem);
222
223 LU_.allocate(mem);
224 LU_.copy_to(mem);
225 }
226}
227
228template <class numeric_t>
229sddk::mdarray<numeric_t, 2>
230Ultrasoft_preconditioner<numeric_t>::apply(const sddk::mdarray<numeric_t, 2>& X, sddk::memory_t pm)
231{
232 auto Y = empty_like(X, sddk::get_memory_pool(pm == sddk::memory_t::none ? ctx_.processing_unit_memory_t() : pm));
233 this->apply(Y, X, pm);
234 return Y;
235}
236
237template <class numeric_t>
238void
239Ultrasoft_preconditioner<numeric_t>::apply(sddk::mdarray<numeric_t, 2>& Y, const sddk::mdarray<numeric_t, 2>& X,
240 sddk::memory_t pm)
241{
242 int num_beta = bp_.num_total_beta();
243 int nbnd = X.size(1);
244
245 pm = (pm == sddk::memory_t::none) ? ctx_.processing_unit_memory_t() : pm;
246 sddk::device_t pu = is_host_memory(pm) ? sddk::device_t::CPU : sddk::device_t::GPU;
247
249 if (sddk::is_device_memory(pm)) {
251 }
252
253 auto bp_gen = bp_.make_generator(pu);
254 auto beta_coeffs = bp_gen.prepare();
255
256 sddk::mdarray<numeric_t, 2> bphi(num_beta, nbnd, sddk::get_memory_pool(pm));
257 // compute inner Beta^H X -> goes to host memory
258 for (int ichunk = 0; ichunk < bp_.num_chunks(); ++ichunk) {
259 bp_gen.generate(beta_coeffs, ichunk);
260 // apply preconditioner to beta projectors
261 auto G = P.apply(beta_coeffs.pw_coeffs_a_, pm);
262 int row_offset = beta_coeffs.beta_chunk_.offset_;
263
264 la::wrap(la).gemm('C', 'N', G.size(1), nbnd, G.size(0), &la::constant<numeric_t>::one(), G.at(pm), G.ld(),
265 X.at(pm), X.ld(), &la::constant<numeric_t>::zero(), bphi.at(pm, row_offset, 0), bphi.ld());
266 }
267 assert(num_beta == static_cast<int>(bphi.size(0)) && nbnd == static_cast<int>(bphi.size(1)));
268
270 if (pu == sddk::device_t::GPU) {
272 }
273 la::wrap(lapack).getrs('N', num_beta, nbnd, LU_.at(pm), LU_.ld(), ipiv_.at(pm), bphi.at(pm), bphi.ld());
274
275 auto R = empty_like(bphi, sddk::get_memory_pool(pm));
276 q_op.rmatmul(R, bphi, ispn_, pm, -1);
277
278 // compute Y <- (1+T')^(-1) X
279 this->P.apply(Y, X, pm);
280
281 for (int ichunk = 0; ichunk < bp_.num_chunks(); ++ichunk) {
282 bp_gen.generate(beta_coeffs, ichunk);
283 // apply preconditioner to beta projectors in place
284 auto G = P.apply(beta_coeffs.pw_coeffs_a_, pm);
285 int m = Y.size(0);
286 int n = Y.size(1);
287 int k = beta_coeffs.pw_coeffs_a_.size(1);
288
289 switch (pu) {
290 case sddk::device_t::CPU: {
291 la::wrap(la::lib_t::blas)
292 .gemm('N', 'N', m, n, k, &la::constant<numeric_t>::one(), G.at(sddk::memory_t::host), G.ld(),
293 R.at(sddk::memory_t::host, beta_coeffs.beta_chunk_.offset_, 0), R.ld(),
294 &la::constant<numeric_t>::one(), Y.at(sddk::memory_t::host), Y.ld());
295 break;
296 }
297#ifdef SIRIUS_GPU
298 case sddk::device_t::GPU:
299 la::wrap(la::lib_t::gpublas)
300 .gemm('N', 'N', m, n, k, &la::constant<numeric_t>::one(), G.at(sddk::memory_t::device), G.ld(),
301 R.at(sddk::memory_t::device, beta_coeffs.beta_chunk_.offset_, 0), R.ld(),
302 &la::constant<numeric_t>::one(), Y.at(sddk::memory_t::device), Y.ld());
303
304 break;
305#endif
306 default:
307 RTE_THROW("invalid processing unit");
308 break;
309 }
310 }
311}
312} // namespace sirius
313
314#endif /* __ULTRASOFT_PRECOND_K_HPP__ */
Contains declaration and implementation of sirius::Beta_projectors_base class.
void lmatmul(sddk::matrix< F > &out, sddk::matrix< F > const &B__, int ispn_block__, sddk::memory_t mem_t, identity_t< F > alpha=F{1}, identity_t< F > beta=F{0}) const
computes α B*Q + β out
void rmatmul(sddk::matrix< F > &out, sddk::matrix< F > const &B__, int ispn_block__, sddk::memory_t mem_t, identity_t< F > alpha=F{1}, identity_t< F > beta=F{0}) const
computes α Q*B + β out
Simulation context is a set of parameters and objects describing a single simulation.
auto processing_unit_memory_t() const
Return the memory type for processing unit.
A set of G-vectors for FFTs and G+k basis functions.
Definition: gvec.hpp:130
int count() const
Number of G-vectors for a fine-grained distribution for the current MPI rank.
Definition: gvec.hpp:506
r3::vector< double > gkvec_cart(int ig__) const
Return G+k vector in fractional coordinates.
Definition: gvec.hpp:592
void copy_to(memory_t mem__, size_t idx0__, size_t n__, acc::stream_id sid=acc::stream_id(-1))
Copy n elements starting from idx0 from one memory type to another.
Definition: memory.hpp:1339
uint32_t ld() const
Return leading dimension size.
Definition: memory.hpp:1233
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Definition: memory.hpp:1057
size_t size() const
Return total size (number of elements) of the array.
Definition: memory.hpp:1207
Declaration and implementation of Gvec class.
Memory management functions and classes.
bool is_device_memory(memory_t mem__)
Check if this is a valid device memory (memory, accessible by the device).
Definition: memory.hpp:93
memory_t
Memory types where the code can store data.
Definition: memory.hpp:71
bool is_host_memory(memory_t mem__)
Check if this is a valid host memory (memory, accessible by the host).
Definition: memory.hpp:86
lib_t
Type of linear algebra backend library.
Definition: linalg_base.hpp:70
@ lapack
CPU LAPACK.
@ gpublas
GPU BLAS (cuBlas or ROCblas)
Namespace of the SIRIUS library.
Definition: sirius.f90:5
@ global
Global index.
sddk::matrix< std::complex< T > > inner_beta(const Beta_projectors_base< T > &beta, const Simulation_context &ctx)
computes <beta|beta> and returns result on ctx.processing_unit_memory_t
Contains declaration of sirius::Non_local_operator class.
Contains definition and implementation of Simulation_context class.