3#if defined(SIRIUS_ELPA)
7#if defined(SIRIUS_DLAF)
8#include <dlaf_c/init.h>
15#if defined(SIRIUS_ELPA)
18void setup_handler(elpa_t& handle__,
int stage__, M
const& m__,
int na__,
int nev__)
21 int nt = omp_get_max_threads();
23 elpa_set_integer(handle__,
"na", na__, &error);
24 elpa_set_integer(handle__,
"nev", nev__, &error);
25 elpa_set_integer(handle__,
"local_nrows", m__.num_rows_local(), &error);
26 elpa_set_integer(handle__,
"local_ncols", m__.num_cols_local(), &error);
27 elpa_set_integer(handle__,
"nblk", m__.bs_row(), &error);
28 elpa_set_integer(handle__,
"mpi_comm_parent", MPI_Comm_c2f(m__.blacs_grid().comm().native()), &error);
29 elpa_set_integer(handle__,
"process_row", m__.blacs_grid().comm_row().rank(), &error);
30 elpa_set_integer(handle__,
"process_col", m__.blacs_grid().comm_col().rank(), &error);
31 elpa_set_integer(handle__,
"blacs_context", m__.blacs_grid().context(), &error);
32 elpa_set_integer(handle__,
"omp_threads", nt, &error);
34 elpa_set_integer(handle__,
"nvidia-gpu", 1, &error);
37 elpa_set_integer(handle__,
"solver", ELPA_SOLVER_1STAGE, &error);
39 elpa_set_integer(handle__,
"solver", ELPA_SOLVER_2STAGE, &error);
44Eigensolver_elpa::Eigensolver_elpa(
int stage__)
48 if (!(stage_ == 1 || stage_ == 2)) {
49 RTE_THROW(
"wrong type of ELPA solver");
55 if (elpa_init(20170403) != ELPA_OK) {
56 RTE_THROW(
"ELPA API version not supported");
60void Eigensolver_elpa::finalize()
70 PROFILE(
"Eigensolver_elpa|solve_gen");
72 int nt = omp_get_max_threads();
75 RTE_THROW(
"number of columns in A and Z don't match");
78 PROFILE_START(
"Eigensolver_elpa|solve_gen|setup");
83 handle = elpa_allocate(&error);
84 if (error != ELPA_OK) {
87 setup_handler(handle, stage_, A__, matrix_size__, nev__);
89 PROFILE_STOP(
"Eigensolver_elpa|solve_gen|setup");
91 auto& mph = get_memory_pool(sddk::memory_t::host);
93 auto w = mph.get_unique_ptr<
double>(matrix_size__);
95 elpa_generalized_eigenvectors_d(handle, A__.at(sddk::memory_t::host), B__.at(sddk::memory_t::host),
96 w.get(), Z__.at(sddk::memory_t::host), 0, &error);
98 if (error != ELPA_OK) {
99 elpa_deallocate(handle, &error);
103 elpa_deallocate(handle, &error);
105 std::copy(w.get(), w.get() + nev__, eval__);
107 if (nt != omp_get_max_threads()) {
109 s <<
"number of OMP threads was changed by elpa" << std::endl
110 <<
" initial number of threads : " << nt << std::endl
111 <<
" new number of threads : " << omp_get_max_threads();
130int Eigensolver_elpa::solve(ftn_int matrix_size__, ftn_int nev__,
la::dmatrix<std::complex<double>>& A__,
la::dmatrix<std::complex<double>>& B__,
131 double* eval__,
la::dmatrix<std::complex<double>>& Z__)
133 PROFILE(
"Eigensolver_elpa|solve_gen");
135 int nt = omp_get_max_threads();
137 if (A__.num_cols_local() != Z__.num_cols_local()) {
138 RTE_THROW(
"number of columns in A and Z don't match");
141 PROFILE_START(
"Eigensolver_elpa|solve_gen|setup");
146 handle = elpa_allocate(&error);
147 if (error != ELPA_OK) {
150 setup_handler(handle, stage_, A__, matrix_size__, nev__);
152 PROFILE_STOP(
"Eigensolver_elpa|solve_gen|setup");
154 auto& mph = get_memory_pool(sddk::memory_t::host);
156 auto w = mph.get_unique_ptr<
double>(matrix_size__);
158 elpa_generalized_eigenvectors_dc(handle, A__.at(sddk::memory_t::host), B__.at(sddk::memory_t::host),
159 w.get(), Z__.at(sddk::memory_t::host), 0, &error);
161 if (error != ELPA_OK) {
162 elpa_deallocate(handle, &error);
166 elpa_deallocate(handle, &error);
168 std::copy(w.get(), w.get() + nev__, eval__);
170 if (nt != omp_get_max_threads()) {
172 s <<
"number of OMP threads was changed by elpa" << std::endl
173 <<
" initial number of threads : " << nt << std::endl
174 <<
" new number of threads : " << omp_get_max_threads();
194 return solve(matrix_size__, matrix_size__, A__, B__, eval__, Z__);
198int Eigensolver_elpa::solve(ftn_int matrix_size__,
la::dmatrix<std::complex<double>>& A__,
la::dmatrix<std::complex<double>>& B__,
double* eval__,
201 return solve(matrix_size__, matrix_size__, A__, B__, eval__, Z__);
207 PROFILE(
"Eigensolver_elpa|solve_std");
209 int nt = omp_get_max_threads();
212 RTE_THROW(
"number of columns in A and Z don't match");
215 PROFILE_START(
"Eigensolver_elpa|solve_std|setup");
220 handle = elpa_allocate(&error);
221 if (error != ELPA_OK) {
224 setup_handler(handle, stage_, A__, matrix_size__, nev__);
226 PROFILE_STOP(
"Eigensolver_elpa|solve_std|setup");
228 auto& mph = get_memory_pool(sddk::memory_t::host);
229 auto w = mph.get_unique_ptr<
double>(matrix_size__);
231 elpa_eigenvectors_a_h_a_d(handle, A__.at(sddk::memory_t::host), w.get(), Z__.at(sddk::memory_t::host), &error);
233 elpa_deallocate(handle, &error);
235 std::copy(w.get(), w.get() + nev__, eval__);
236 if (nt != omp_get_max_threads()) {
238 s <<
"number of OMP threads was changed by elpa" << std::endl
239 <<
" initial number of threads : " << nt << std::endl
240 <<
" new number of threads : " << omp_get_max_threads();
247int Eigensolver_elpa::solve(ftn_int matrix_size__, ftn_int nev__,
la::dmatrix<std::complex<double>>& A__,
double* eval__,
250 PROFILE(
"Eigensolver_elpa|solve_std");
252 int nt = omp_get_max_threads();
254 if (A__.num_cols_local() != Z__.num_cols_local()) {
255 RTE_THROW(
"number of columns in A and Z don't match");
258 PROFILE_START(
"Eigensolver_elpa|solve_std|setup");
263 handle = elpa_allocate(&error);
264 if (error != ELPA_OK) {
267 setup_handler(handle, stage_, A__, matrix_size__, nev__);
269 PROFILE_STOP(
"Eigensolver_elpa|solve_std|setup");
271 auto& mph = get_memory_pool(sddk::memory_t::host);
272 auto w = mph.get_unique_ptr<
double>(matrix_size__);
274 auto A_ptr = A__.size_local() ? A__.at(sddk::memory_t::host) :
nullptr;
275 auto Z_ptr = Z__.size_local() ? Z__.at(sddk::memory_t::host) :
nullptr;
276 elpa_eigenvectors_a_h_a_dc(handle, A_ptr, w.get(), Z_ptr, &error);
278 elpa_deallocate(handle, &error);
280 std::copy(w.get(), w.get() + nev__, eval__);
282 if (nt != omp_get_max_threads()) {
284 s <<
"number of OMP threads was changed by elpa" << std::endl
285 <<
" initial number of threads : " << nt << std::endl
286 <<
" new number of threads : " << omp_get_max_threads();
296 return solve(matrix_size__, matrix_size__, A__, eval__, Z__);
300int Eigensolver_elpa::solve(ftn_int matrix_size__,
la::dmatrix<std::complex<double>>& A__,
double* eval__,
la::dmatrix<std::complex<double>>& Z__)
302 return solve(matrix_size__, matrix_size__, A__, eval__, Z__);
307#if defined(SIRIUS_DLAF)
311 const char* pika_argv[] = {
"sirius",
"--pika:print-bind"};
312 const char* dlaf_argv[] = {
"sirius"};
313 dlaf_initialize(2, pika_argv, 1, dlaf_argv);
316void Eigensolver_dlaf::finalize()
int num_cols_local() const
Local number of columns.
Contains definition and implementation of various eigenvalue solver interfaces.
memory_t
Memory types where the code can store data.
@ error
throw a parse_error exception in case of a tag
int num_devices()
Get the number of devices.
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
ev_solver_t
Type of eigen-value solver.
Namespace of the SIRIUS library.
void initialize(bool call_mpi_init__=true)
Initialize the library.