25#ifndef __GENERATE_SUBSPACE_MATRIX_HPP__
26#define __GENERATE_SUBSPACE_MATRIX_HPP__
50template <
typename T,
typename F>
55 PROFILE(
"sirius::generate_subspace_matrix");
58 if (mtrx_old__ && mtrx_old__->size()) {
59 RTE_ASSERT(&mtrx__.blacs_grid() == &mtrx_old__->blacs_grid());
65 n_blocks(mtrx__.blacs_grid().num_ranks_row()),
block_id(mtrx__.blacs_grid().rank_row()),
68 n_blocks(mtrx__.blacs_grid().num_ranks_col()),
block_id(mtrx__.blacs_grid().rank_col()),
73 #pragma omp parallel for schedule(static)
74 for (
int i = 0; i < spl_col.
local_size(); i++) {
75 std::copy(&(*mtrx_old__)(0, i), &(*mtrx_old__)(0, i) + spl_row.
local_size(), &mtrx__(0, i));
80 if (env::print_checksum()) {
81 auto cs = mtrx__.checksum(N__ - num_locked__, N__ - num_locked__);
83 print_checksum(
"subspace_mtrx_old", cs, RTE_OUT(std::cout));
91 auto mem = ctx__.processing_unit() == sddk::device_t::CPU ? sddk::memory_t::host : sddk::memory_t::device;
95 mtrx__, 0, N__ - num_locked__);
99 if (mtrx__.blacs_grid().comm().size() == 1) {
100 #pragma omp parallel for
101 for (
int i = 0; i < N__ - num_locked__; i++) {
102 for (
int j = N__ - num_locked__; j < N__ + n__ - num_locked__; j++) {
103 mtrx__(j, i) =
conj(mtrx__(i, j));
108 .
tranc(n__, N__ - num_locked__, mtrx__, 0, N__ - num_locked__, mtrx__, N__ - num_locked__, 0);
112 if (env::print_checksum()) {
114 n_blocks(mtrx__.blacs_grid().num_ranks_row()),
block_id(mtrx__.blacs_grid().rank_row()),
117 n_blocks(mtrx__.blacs_grid().num_ranks_col()),
block_id(mtrx__.blacs_grid().rank_col()),
119 auto cs = mtrx__.checksum(N__ + n__ - num_locked__, N__ + n__ - num_locked__);
121 print_checksum(
"subspace_mtrx", cs, RTE_OUT(std::cout));
126 mtrx__.make_real_diag(N__ + n__ - num_locked__);
131 n_blocks(mtrx__.blacs_grid().num_ranks_row()),
block_id(mtrx__.blacs_grid().rank_row()),
134 n_blocks(mtrx__.blacs_grid().num_ranks_col()),
block_id(mtrx__.blacs_grid().rank_col()),
138 #pragma omp parallel for schedule(static)
139 for (
int i = 0; i < spl_col.
local_size(); i++) {
Simulation context is a set of parameters and objects describing a single simulation.
auto const & comm_band() const
Band parallelization communicator.
int num_mag_dims() const
Number of dimensions in the magnetization vector.
int bs_row() const
Row blocking factor.
int bs_col() const
Column blocking factor.
void tranc(ftn_int m, ftn_int n, dmatrix< T > &A, ftn_int ia, ftn_int ja, dmatrix< T > &C, ftn_int ic, ftn_int jc) const
Conjugate transpose matrix.
value_type local_size(block_id block_id__) const
Return local size of the split index for a given block.
Wave-functions representation.
Describe a range of bands.
Describe a range of spins.
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
@ scalapack
CPU ScaLAPACK.
std::enable_if_t< std::is_same< T, real_type< F > >::value, void > inner(::spla::Context &spla_ctx__, sddk::memory_t mem__, spin_range spins__, W const &wf_i__, band_range br_i__, Wave_functions< T > const &wf_j__, band_range br_j__, la::dmatrix< F > &result__, int irow0__, int jcol0__)
Compute inner product between the two sets of wave-functions.
Namespace of the SIRIUS library.
strong_type< int, struct __n_blocks_tag > n_blocks
Number of blocks to which the global index is split.
auto conj(double x__)
Return complex conjugate of a number. For a real value this is the number itself.
void generate_subspace_matrix(Simulation_context &ctx__, int N__, int n__, int num_locked__, wf::Wave_functions< T > &phi__, wf::Wave_functions< T > &op_phi__, la::dmatrix< F > &mtrx__, la::dmatrix< F > *mtrx_old__=nullptr)
Generate subspace matrix for the iterative diagonalization.
Contains definition and implementation of Simulation_context class.
Contains declaration and implementation of Wave_functions class.