SIRIUS 7.5.0
Electronic structure library and applications
beta_projectors_base.cpp
Go to the documentation of this file.
1// Copyright (c) 2013-2022 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file beta_projectors_base.cpp
21 *
22 * \brief Contains implementation of beta-projectors generator.
23 */
24
25#include <stdexcept>
28#include "core/profiler.hpp"
29#include "core/env/env.hpp"
31
32namespace sirius {
33
34#if defined(SIRIUS_GPU)
35void
36create_beta_gk_gpu(int num_atoms, int num_gkvec, int const* beta_desc, std::complex<float> const* beta_gk_t,
37 double const* gkvec, double const* atom_pos, std::complex<float>* beta_gk)
38{
39 create_beta_gk_gpu_float(num_atoms, num_gkvec, beta_desc, beta_gk_t, gkvec, atom_pos, beta_gk);
40}
41
42void
43create_beta_gk_gpu(int num_atoms, int num_gkvec, int const* beta_desc, std::complex<double> const* beta_gk_t,
44 double const* gkvec, double const* atom_pos, std::complex<double>* beta_gk)
45{
46 create_beta_gk_gpu_double(num_atoms, num_gkvec, beta_desc, beta_gk_t, gkvec, atom_pos, beta_gk);
47}
48#endif
49
50/// Internal implementation of beta-projectors generator.
51namespace local {
52
53template <class T>
54void
55beta_projectors_generate_cpu(sddk::matrix<std::complex<T>>& pw_coeffs_a,
56 sddk::mdarray<std::complex<T>, 3> const& pw_coeffs_t, int ichunk__, int j__, beta_chunk_t const& beta_chunk,
57 Simulation_context const& ctx, fft::Gvec const& gkvec)
58{
59 PROFILE("beta_projectors_generate_cpu");
60
61 using numeric_t = std::complex<T>;
62 using double_complex = std::complex<double>;
63
64 int num_gkvec_loc = gkvec.count();
65 auto& unit_cell = ctx.unit_cell();
66
67 #pragma omp parallel for
68 for (int i = 0; i < beta_chunk.num_atoms_; i++) {
69 int ia = beta_chunk.desc_(static_cast<int>(beta_desc_idx::ia), i);
70
71 double phase = twopi * dot(gkvec.vk(), unit_cell.atom(ia).position());
72 double_complex phase_k = std::exp(double_complex(0.0, phase));
73
74 std::vector<double_complex> phase_gk(num_gkvec_loc);
75 for (int igk_loc = 0; igk_loc < num_gkvec_loc; igk_loc++) {
76 auto G = gkvec.gvec<index_domain_t::local>(igk_loc);
77 /* total phase e^{-i(G+k)r_{\alpha}} */
78 phase_gk[igk_loc] = std::conj(ctx.gvec_phase_factor(G, ia) * phase_k);
79 }
80
81 int offset_a = beta_chunk.desc_(beta_desc_idx::offset, i);
82 int offset_t = beta_chunk.desc_(beta_desc_idx::offset_t, i);
83 int nbeta = beta_chunk.desc_(beta_desc_idx::nbf, i);
84 for (int xi = 0; xi < nbeta; xi++) {
85 for (int igk_loc = 0; igk_loc < num_gkvec_loc; igk_loc++) {
86 pw_coeffs_a(igk_loc, offset_a + xi) =
87 pw_coeffs_t(igk_loc, offset_t + xi, j__) * static_cast<numeric_t>(phase_gk[igk_loc]);
88 }
89 }
90 }
91}
92
93// explicit instantiation
94template void
95beta_projectors_generate_cpu<double>(sddk::matrix<std::complex<double>>&,
96 sddk::mdarray<std::complex<double>, 3> const&, int, int, beta_chunk_t const&, Simulation_context const&,
97 fft::Gvec const&);
98#ifdef SIRIUS_USE_FP32
99// explicit instantiation
100template void
101beta_projectors_generate_cpu<float>(sddk::matrix<std::complex<float>>&, sddk::mdarray<std::complex<float>, 3> const&,
102 int, int, beta_chunk_t const&, Simulation_context const&, fft::Gvec const&);
103#endif
104
105template <class T>
106void
107beta_projectors_generate_gpu(beta_projectors_coeffs_t<T>& out,
108 sddk::mdarray<std::complex<T>, 3> const& pw_coeffs_t_device,
109 sddk::mdarray<std::complex<T>, 3> const& pw_coeffs_t_host, Simulation_context const& ctx,
110 fft::Gvec const& gkvec, sddk::mdarray<double, 2> const& gkvec_coord_,
111 beta_chunk_t const& beta_chunk, int j__)
112{
113 PROFILE("beta_projectors_generate_gpu");
114#if defined(SIRIUS_GPU)
115 int num_gkvec_loc = gkvec.count();
116 auto& desc = beta_chunk.desc_;
117 create_beta_gk_gpu(beta_chunk.num_atoms_, num_gkvec_loc, desc.at(sddk::memory_t::device),
118 pw_coeffs_t_device.at(sddk::memory_t::device, 0, 0, j__), gkvec_coord_.at(sddk::memory_t::device),
119 beta_chunk.atom_pos_.at(sddk::memory_t::device), out.pw_coeffs_a_.at(sddk::memory_t::device));
120#endif
121}
122
123// explicit instantiation
124template void
125beta_projectors_generate_gpu<double>(beta_projectors_coeffs_t<double>&, sddk::mdarray<std::complex<double>, 3> const&,
126 sddk::mdarray<std::complex<double>, 3> const&, Simulation_context const&, fft::Gvec const&,
127 sddk::mdarray<double, 2> const&, beta_chunk_t const&, int);
128#ifdef SIRIUS_USE_FP32
129// explicit instantiation
130template void
131beta_projectors_generate_gpu<float>(beta_projectors_coeffs_t<float>&, sddk::mdarray<std::complex<float>, 3> const&,
132 sddk::mdarray<std::complex<float>, 3> const&, Simulation_context const&, fft::Gvec const&,
133 sddk::mdarray<double, 2> const&, beta_chunk_t const&, int);
134#endif
135} // namespace local
136
137template <typename T>
138void
140{
141 auto& uc = ctx_.unit_cell();
142
143 std::vector<int> offset_t(uc.num_atom_types());
144 std::generate(offset_t.begin(), offset_t.end(),
145 [n = 0, iat = 0, &uc] () mutable
146 {
147 int offs = n;
148 n += uc.atom_type(iat++).mt_basis_size();
149 return offs;
150 });
151
152 if (uc.max_mt_basis_size() == 0) {
153 /* no beta projectors at all */
154 beta_chunks_ = std::vector<beta_chunk_t>(0);
155 num_beta_t_ = 0;
156 max_num_beta_ = 0;
157 return;
158 }
159
160 /* initial chunk size */
161 int chunk_size = std::min(uc.num_atoms(), ctx_.cfg().control().beta_chunk_size());
162 /* maximum number of chunks */
163 int num_chunks = uc.num_atoms() / chunk_size + std::min(1, uc.num_atoms() % chunk_size);
164 /* final maximum chunk size */
165 chunk_size = uc.num_atoms() / num_chunks + std::min(1, uc.num_atoms() % num_chunks);
166
167 int offset_in_beta_gk{0};
168 beta_chunks_ = std::vector<beta_chunk_t>(num_chunks);
169
170 for (int ib = 0; ib < num_chunks; ib++) {
171 /* number of atoms in this chunk */
172 int na = std::min(uc.num_atoms(), (ib + 1) * chunk_size) - ib * chunk_size;
173 beta_chunks_[ib].num_atoms_ = na;
174 beta_chunks_[ib].desc_ = sddk::mdarray<int, 2>(4, na);
175 beta_chunks_[ib].atom_pos_ = sddk::mdarray<double, 2>(3, na);
176
177 int num_beta{0};
178 for (int i = 0; i < na; i++) {
179 /* global index of atom by local index and chunk */
180 int ia = ib * chunk_size + i;
181 auto pos = uc.atom(ia).position();
182 auto& type = uc.atom(ia).type();
183 /* atom fractional coordinates */
184 for (int x : {0, 1, 2}) {
185 beta_chunks_[ib].atom_pos_(x, i) = pos[x];
186 }
187 /* number of beta functions for atom */
188 beta_chunks_[ib].desc_(beta_desc_idx::nbf, i) = type.mt_basis_size();
189 /* offset in beta_gk*/
190 beta_chunks_[ib].desc_(beta_desc_idx::offset, i) = num_beta;
191 /* offset in beta_gk_t */
192 beta_chunks_[ib].desc_(beta_desc_idx::offset_t, i) = offset_t[type.id()]; //offset_lo();
193 /* global index of atom */
194 beta_chunks_[ib].desc_(beta_desc_idx::ia, i) = ia;
195
196 num_beta += type.mt_basis_size();
197 }
198 /* number of beta-projectors in this chunk */
199 beta_chunks_[ib].num_beta_ = num_beta;
200 beta_chunks_[ib].offset_ = offset_in_beta_gk;
201 offset_in_beta_gk += num_beta;
202
203 if (ctx_.processing_unit() == sddk::device_t::GPU) {
204 beta_chunks_[ib].desc_.allocate(sddk::memory_t::device).copy_to(sddk::memory_t::device);
205 beta_chunks_[ib].atom_pos_.allocate(sddk::memory_t::device).copy_to(sddk::memory_t::device);
206 }
207 }
208 num_total_beta_ = offset_in_beta_gk;
209
210 max_num_beta_ = 0;
211 for (auto& e : beta_chunks_) {
212 max_num_beta_ = std::max(max_num_beta_, e.num_beta_);
213 }
214
215 num_beta_t_ = 0;
216 for (int iat = 0; iat < uc.num_atom_types(); iat++) {
217 num_beta_t_ += uc.atom_type(iat).mt_basis_size();
218 }
219}
220
221template <typename T>
223 : ctx_(ctx__)
224 , gkvec_(gkvec__)
225 , N_(N__)
226{
228
229 if (!num_beta_t()) {
230 return;
231 }
232
233 /* allocate memory */
234 pw_coeffs_t_ = sddk::mdarray<std::complex<T>, 3>(num_gkvec_loc(), num_beta_t(), N__, sddk::memory_t::host,
235 "pw_coeffs_t_");
236
237 if (ctx_.processing_unit() == sddk::device_t::GPU) {
238 gkvec_coord_ = sddk::mdarray<double, 2>(3, num_gkvec_loc());
239 gkvec_coord_.allocate(sddk::memory_t::device);
240 /* copy G+k vectors */
241 for (int igk_loc = 0; igk_loc < num_gkvec_loc(); igk_loc++) {
242 auto vgk = gkvec_.template gkvec<index_domain_t::local>(igk_loc);
243 for (auto x : {0, 1, 2}) {
244 gkvec_coord_(x, igk_loc) = vgk[x];
245 }
246 }
247 gkvec_coord_.copy_to(sddk::memory_t::device);
248 }
249}
250
251template <class T>
252void
253Beta_projector_generator<T>::generate(beta_projectors_coeffs_t<T>& out, int ichunk__) const
254{
255 PROFILE("sirius::Beta_projector_generator");
256 using numeric_t = std::complex<T>;
257
258 int j{0};
259 out.beta_chunk_ = beta_chunks_.at(ichunk__);
260
261 auto num_beta = out.beta_chunk_.num_beta_;
262 auto gk_size = gkvec_.count();
263
264 switch (processing_unit_) {
265 case sddk::device_t::CPU: {
266 out.pw_coeffs_a_ =
267 sddk::matrix<numeric_t>(const_cast<numeric_t*>(&beta_pw_all_atoms_(0, beta_chunks_[ichunk__].offset_)),
268 gk_size, beta_chunks_[ichunk__].num_beta_);
269 break;
270 }
271 case sddk::device_t::GPU: {
272 out.pw_coeffs_a_ =
273 sddk::matrix<numeric_t>(nullptr, out.pw_coeffs_a_buffer_.device_data(), gk_size, num_beta);
274 local::beta_projectors_generate_gpu(out, pw_coeffs_t_device_, pw_coeffs_t_host_, ctx_, gkvec_, gkvec_coord_,
275 beta_chunks_[ichunk__], j);
276 break;
277 }
278 }
279}
280
281template <class T>
282void
283Beta_projector_generator<T>::generate(beta_projectors_coeffs_t<T>& out, int ichunk__, int j__) const
284{
285 PROFILE("sirius::Beta_projector_generator");
286 using numeric_t = std::complex<T>;
287
288 out.beta_chunk_ = beta_chunks_.at(ichunk__);
289
290 auto num_beta = out.beta_chunk_.num_beta_;
291 auto gk_size = gkvec_.count();
292
293 switch (processing_unit_) {
294 case sddk::device_t::CPU: {
295 // allocate pw_coeffs_a
296 out.pw_coeffs_a_ = sddk::matrix<numeric_t>(gk_size, num_beta, sddk::get_memory_pool(sddk::memory_t::host));
297 local::beta_projectors_generate_cpu(out.pw_coeffs_a_, pw_coeffs_t_host_, ichunk__, j__,
298 beta_chunks_[ichunk__], ctx_, gkvec_);
299 break;
300 }
301 case sddk::device_t::GPU: {
302 // view of internal buffer with correct number of cols (= num_beta)
303 out.pw_coeffs_a_ =
304 sddk::matrix<numeric_t>(nullptr, out.pw_coeffs_a_buffer_.device_data(), gk_size, num_beta);
305 // g0 coefficients reside in host memory
306
307 local::beta_projectors_generate_gpu(out, pw_coeffs_t_device_, pw_coeffs_t_host_, ctx_, gkvec_, gkvec_coord_,
308 beta_chunks_[ichunk__], j__);
309 break;
310 }
311 }
312}
313
314template class Beta_projector_generator<double>;
315template class Beta_projectors_base<double>;
316#ifdef SIRIUS_USE_FP32
317template class Beta_projector_generator<float>;
319#endif
320
321} // namespace sirius
Contains declaration and implementation of sirius::Beta_projectors_base class.
Base class for beta-projectors, gradient of beta-projectors and strain derivatives of beta-projectors...
fft::Gvec const & gkvec_
List of G+k vectors.
sddk::mdarray< double, 2 > gkvec_coord_
Coordinates of G+k vectors used by GPU kernel.
sddk::mdarray< std::complex< T >, 3 > pw_coeffs_t_
Phase-factor independent coefficients of |beta> functions for atom types.
void split_in_chunks()
Split beta-projectors into chunks.
Simulation context is a set of parameters and objects describing a single simulation.
auto gvec_phase_factor(r3::vector< int > G__, int ia__) const
Phase factors .
A set of G-vectors for FFTs and G+k basis functions.
Definition: gvec.hpp:130
int count() const
Number of G-vectors for a fine-grained distribution for the current MPI rank.
Definition: gvec.hpp:506
r3::vector< int > gvec(int ig__) const
Return G vector in fractional coordinates.
Definition: gvec.hpp:540
void copy_to(memory_t mem__, size_t idx0__, size_t n__, acc::stream_id sid=acc::stream_id(-1))
Copy n elements starting from idx0 from one memory type to another.
Definition: memory.hpp:1339
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Definition: memory.hpp:1057
Get the environment variables.
Basic interface to linear algebra functions.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
const double twopi
Definition: constants.hpp:45
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
Definition: math_tools.hpp:165
A time-based profiler.
Describe chunk of beta-projectors for a block of atoms.
sddk::mdarray< int, 2 > desc_
Descriptor of block of beta-projectors for an atom.
int num_atoms_
Number of atoms in the current chunk.
sddk::mdarray< double, 2 > atom_pos_
Positions of atoms.
static const int ia
Global index of atom.
static const int nbf
Number of beta-projector functions for this atom.
static const int offset
Offset of beta-projectors in this chunk.
static const int offset_t
Offset of beta-projectors in the array for atom types.
Stores a chunk of the beta-projector and metadata.
Contains declaration and implementation of Wave_functions class.