SIRIUS 7.5.0
Electronic structure library and applications
generate_alm_block.hpp
1#ifndef __GENERATE_ALM_BLOCK_HPP__
2#define __GENERATE_ALM_BLOCK_HPP__
3
6
7namespace sirius {
8
9/// Generate matching coefficients for a block of atoms.
10template <bool conjugate, typename T>
11auto generate_alm_block(Simulation_context const& ctx__, int atom_begin__, int num_atoms__,
12 Matching_coefficients const& alm__)
13{
14 PROFILE("sirius::generate_alm_block");
15
16 int num_mt_aw{0};
17 std::vector<int> mt_aw_offset(num_atoms__);
18 for (int ia = 0; ia < num_atoms__; ia++) {
19 mt_aw_offset[ia] = num_mt_aw;
20 num_mt_aw += ctx__.unit_cell().atom(atom_begin__ + ia).mt_aw_basis_size();
21 }
22
24 switch (ctx__.processing_unit()) {
25 case sddk::device_t::CPU: {
26 result = sddk::mdarray<std::complex<T>, 2>(alm__.gkvec().count(), num_mt_aw,
27 get_memory_pool(sddk::memory_t::host), "alm_block");
28 break;
29 }
30 case sddk::device_t::GPU: {
31 result = sddk::mdarray<std::complex<T>, 2>(alm__.gkvec().count(), num_mt_aw,
32 get_memory_pool(sddk::memory_t::host_pinned), "alm_block");
33 result.allocate(get_memory_pool(sddk::memory_t::device));
34 break;
35 }
36 }
37
38 #pragma omp parallel
39 {
40 int tid = omp_get_thread_num();
41 #pragma omp for
42 for (int i = 0; i < num_atoms__; i++) {
43 auto& atom = ctx__.unit_cell().atom(atom_begin__ + i);
44 auto& type = atom.type();
45 /* wrap matching coefficients of a single atom */
47 switch (ctx__.processing_unit()) {
48 case sddk::device_t::CPU: {
49 alm_atom = sddk::mdarray<std::complex<T>, 2>(result.at(sddk::memory_t::host, 0, mt_aw_offset[i]),
50 alm__.gkvec().count(), type.mt_aw_basis_size(), "alm_atom");
51 break;
52 }
53 case sddk::device_t::GPU: {
54 alm_atom = sddk::mdarray<std::complex<T>, 2>(result.at(sddk::memory_t::host, 0, mt_aw_offset[i]),
55 result.at(sddk::memory_t::device, 0, mt_aw_offset[i]),
56 alm__.gkvec().count(), type.mt_aw_basis_size(), "alm_atom");
57 break;
58 }
59 }
60 /* generate LAPW matching coefficients on the CPU */
61 alm__.template generate<conjugate>(atom, alm_atom);
62 if (ctx__.processing_unit() == sddk::device_t::GPU) {
63 alm_atom.copy_to(sddk::memory_t::device, acc::stream_id(tid));
64 }
65
66 }
67 if (ctx__.processing_unit() == sddk::device_t::GPU) {
69 }
70 }
71 return result;
72}
73
74}
75
76#endif
Simulation context is a set of parameters and objects describing a single simulation.
Helper class to wrap stream id (integer number).
Definition: acc.hpp:132
Multidimensional array with the column-major (Fortran) order.
Definition: memory.hpp:660
void copy_to(memory_t mem__, size_t idx0__, size_t n__, acc::stream_id sid=acc::stream_id(-1))
Copy n elements starting from idx0 from one memory type to another.
Definition: memory.hpp:1339
mdarray< T, N > & allocate(memory_t memory__)
Allocate memory for array.
Definition: memory.hpp:1057
Contains definition and partial implementation of sirius::Matching_coefficients class.
void sync_stream(stream_id sid__)
Synchronize a single stream.
Definition: acc.hpp:234
Namespace of the SIRIUS library.
Definition: sirius.f90:5
auto generate_alm_block(Simulation_context const &ctx__, int atom_begin__, int num_atoms__, Matching_coefficients const &alm__)
Generate matching coefficients for a block of atoms.
Contains definition and implementation of Simulation_context class.