26#include "lapw/generate_alm_block.hpp"
33 PROFILE(
"sirius::K_point::generate_fv_states");
35 if (!ctx_.full_potential()) {
39 auto const& uc = ctx_.unit_cell();
41 auto pcs = env::print_checksum();
43 auto bs = ctx_.cyclic_block_size();
45 ctx_.blacs_grid(), bs, bs);
54 for (
int i = 0; i < na; i++) {
55 int ia = atom_begin + i;
56 auto& type = uc.atom(ia).type();
57 num_mt_aw += type.mt_aw_basis_size();
61 auto alm = generate_alm_block<false, T>(ctx_, atom_begin, na, this->alm_coeffs_loc());
62 auto cs = alm.checksum();
64 print_checksum(
"alm", cs, RTE_OUT(this->out(0)));
68 spla::pgemm_ssb(num_mt_aw, ctx_.num_fv_states(), this->gkvec().count(), SPLA_OP_TRANSPOSE, 1.0,
69 alm.at(sddk::memory_t::host), alm.ld(),
71 fv_eigen_vectors_slab().ld(),
72 0.0, alm_fv.at(sddk::memory_t::host), alm_fv.
ld(), mt_aw_offset, 0, alm_fv.spla_distribution(),
76 mt_aw_offset += num_mt_aw;
79 std::vector<int> num_mt_apw_coeffs(uc.num_atoms());
80 for (
int ia = 0; ia < uc.num_atoms(); ia++) {
81 num_mt_apw_coeffs[ia] = uc.atom(ia).mt_aw_basis_size();
89 auto layout_in = alm_fv.grid_layout(0, 0, uc.mt_aw_basis_size(), ctx_.num_fv_states());
93 #pragma omp parallel for
94 for (
int i = 0; i < ctx_.num_fv_states(); i++) {
98 std::copy(in_ptr, in_ptr + gkvec().count(), out_ptr);
101 int num_mt_aw = uc.atom(it.i).type().mt_aw_basis_size();
103 for (
int xi = 0; xi < num_mt_aw; xi++) {
108 for (
int xi = 0; xi < uc.atom(it.i).type().mt_lo_basis_size(); xi++) {
117 print_checksum(
"fv_states_pw", z1, RTE_OUT(this->out(0)));
118 print_checksum(
"fv_states_mt", z2, RTE_OUT(this->out(0)));
124#ifdef SIRIUS_USE_FP32
void generate_fv_states()
Generate first-variational states from eigen-vectors.
uint32_t ld() const
Return leading dimension size.
Wave-functions for the muffin-tin part of LAPW.
auto const & spl_num_atoms() const
Return a split index for the number of atoms.
auto & mt_coeffs(int xi__, atom_index_t::local ia__, spin_index ispn__, band_index i__)
Return reference to the coefficient by atomic orbital index, atom, spin and band indices.
auto grid_layout_mt(spin_index ispn__, band_range b__)
Return COSTA layout for the muffin-tin part for a given spin index and band range.
Describe a range of bands.
Contains definition of sirius::K_point class.
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
void zero(T *ptr__, size_t n__)
Zero the device memory.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > transform(::spla::Context &spla_ctx__, sddk::memory_t mem__, la::dmatrix< F > const &M__, int irow0__, int jcol0__, real_type< F > alpha__, Wave_functions< T > const &wf_in__, spin_index s_in__, band_range br_in__, real_type< F > beta__, Wave_functions< T > &wf_out__, spin_index s_out__, band_range br_out__)
Apply linear transformation to the wave-functions.
Namespace of the SIRIUS library.
auto split_in_blocks(int length__, int block_size__)
Split the 'length' elements into blocks with the initial block size.