SIRIUS 7.5.0
Electronic structure library and applications
eigenproblem.hpp
Go to the documentation of this file.
1// Copyright (c) 2013-2019 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file eigenproblem.hpp
21 *
22 * \brief Contains definition and implementation of various eigenvalue solver interfaces.
23 */
24
25#ifndef __EIGENPROBLEM_HPP__
26#define __EIGENPROBLEM_HPP__
27
28#include "core/profiler.hpp"
29#include "core/rte/rte.hpp"
30#include "core/omp.hpp"
31#include "linalg.hpp"
32#include "eigensolver.hpp"
33#if defined(SIRIUS_GPU) && defined(SIRIUS_MAGMA)
34#include "core/acc/magma.hpp"
35#endif
36
37#if defined(SIRIUS_GPU) && defined(SIRIUS_CUDA)
38#include "core/acc/cusolver.hpp"
39#endif
40
41#if defined(SIRIUS_DLAF)
42#include <dlaf_c/eigensolver/eigensolver.h>
43#include <dlaf_c/eigensolver/gen_eigensolver.h>
44#endif
45
46namespace sirius {
47
48namespace la {
49
51{
52 public:
54 : Eigensolver(ev_solver_t::lapack, false, sddk::memory_t::host, sddk::memory_t::host)
55 {
56 }
57
58 /// wrapper for solving a standard eigen-value problem for all eigen-pairs.
59 int solve(ftn_int matrix_size__, dmatrix<double>& A__, double* eval__, dmatrix<double>& Z__) override {
60 PROFILE("Eigensolver_lapack|dsyevd");
61 return solve_(matrix_size__, A__, eval__, Z__);
62 }
63
64 int solve(ftn_int matrix_size__, dmatrix<std::complex<double>>& A__, double* eval__, dmatrix<std::complex<double>>& Z__) override {
65 PROFILE("Eigensolver_lapack|zheevd");
66 return solve_(matrix_size__, A__, eval__, Z__);
67 }
68
69 int solve(ftn_int matrix_size__, dmatrix<float>& A__, float* eval__, dmatrix<float>& Z__) override {
70 PROFILE("Eigensolver_lapack|ssyevd");
71 return solve_(matrix_size__, A__, eval__, Z__);
72 }
73
74 int solve(ftn_int matrix_size__, dmatrix<std::complex<float>>& A__, float* eval__, dmatrix<std::complex<float>>& Z__) override {
75 PROFILE("Eigensolver_lapack|cheevd");
76 return solve_(matrix_size__, A__, eval__, Z__);
77 }
78
79 /// Solve a standard eigen-value problem for all eigen-pairs.
80 template <typename T>
81 int solve_(ftn_int matrix_size__, dmatrix<T>& A__, real_type<T>* eval__, dmatrix<T>& Z__)
82 {
83 ftn_int info;
84 ftn_int lda = A__.ld();
85
86 ftn_int lwork;
87 ftn_int liwork = 3 + 5 * matrix_size__;
88 ftn_int lrwork = 1 + 5 * matrix_size__ + 2 * matrix_size__ * matrix_size__; // only required in complex
89
90 if (std::is_scalar<T>::value) {
91 lwork = 1 + 6 * matrix_size__ + 2 * matrix_size__ * matrix_size__;
92 } else {
93 lwork = 2 * matrix_size__ + matrix_size__ * matrix_size__;
94 }
95
96 auto& mph = get_memory_pool(sddk::memory_t::host);
97
98 auto work = mph.get_unique_ptr<T>(lwork);
99 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
100 auto rwork = mph.get_unique_ptr<real_type<T>>(lrwork); // only required in complex
101
102 if (std::is_same<T, double>::value) {
103 FORTRAN(dsyevd)
104 ("V", "U", &matrix_size__, reinterpret_cast<double*>(A__.at(sddk::memory_t::host)), &lda,
105 reinterpret_cast<double*>(eval__), reinterpret_cast<double*>(work.get()), &lwork, iwork.get(), &liwork,
106 &info, (ftn_int)1, (ftn_int)1);
107 } else if (std::is_same<T, std::complex<double>>::value) {
108 FORTRAN(zheevd)
109 ("V", "U", &matrix_size__, reinterpret_cast<std::complex<double>*>(A__.at(sddk::memory_t::host)), &lda,
110 reinterpret_cast<double*>(eval__), reinterpret_cast<std::complex<double>*>(work.get()), &lwork,
111 reinterpret_cast<double*>(rwork.get()), &lrwork, iwork.get(), &liwork, &info, (ftn_int)1, (ftn_int)1);
112 } else if (std::is_same<T, float>::value) {
113 FORTRAN(ssyevd)
114 ("V", "U", &matrix_size__, reinterpret_cast<float*>(A__.at(sddk::memory_t::host)), &lda,
115 reinterpret_cast<float*>(eval__), reinterpret_cast<float*>(work.get()), &lwork, iwork.get(), &liwork,
116 &info, (ftn_int)1, (ftn_int)1);
117 } else if (std::is_same<T, std::complex<float>>::value) {
118 FORTRAN(cheevd)
119 ("V", "U", &matrix_size__, reinterpret_cast<std::complex<float>*>(A__.at(sddk::memory_t::host)), &lda,
120 reinterpret_cast<float*>(eval__), reinterpret_cast<std::complex<float>*>(work.get()), &lwork,
121 reinterpret_cast<float*>(rwork.get()), &lrwork, iwork.get(), &liwork, &info, (ftn_int)1, (ftn_int)1);
122 }
123
124 if (!info) {
125 for (int i = 0; i < matrix_size__; i++) {
126 std::copy(A__.at(sddk::memory_t::host, 0, i), A__.at(sddk::memory_t::host, 0, i) + matrix_size__,
127 Z__.at(sddk::memory_t::host, 0, i));
128 }
129 }
130 return info;
131 }
132
133 /// wrapper for solving a standard eigen-value problem for N lowest eigen-pairs.
134 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__, double* eval__, dmatrix<double>& Z__) override {
135 PROFILE("Eigensolver_lapack|dsyevr");
136 return solve_(matrix_size__, nev__, A__, eval__, Z__);
137 }
138
139 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__, double* eval__, dmatrix<std::complex<double>>& Z__) override {
140 PROFILE("Eigensolver_lapack|zheevx");
141 return solve_(matrix_size__, nev__, A__, eval__, Z__);
142 }
143
144 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<float>& A__, float* eval__, dmatrix<float>& Z__) override {
145 PROFILE("Eigensolver_lapack|ssyevr");
146 return solve_(matrix_size__, nev__, A__, eval__, Z__);
147 }
148
149 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<float>>& A__, float* eval__, dmatrix<std::complex<float>>& Z__) override {
150 PROFILE("Eigensolver_lapack|cheevx");
151 return solve_(matrix_size__, nev__, A__, eval__, Z__);
152 }
153
154 /// Solve a standard eigen-value problem for N lowest eigen-pairs.
155 template <typename T>
156 int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix<T>& A__, real_type<T>* eval__, dmatrix<T>& Z__)
157 {
158 real_type<T> vl, vu;
159
160 ftn_int il{1};
161 ftn_int m{-1};
162 ftn_int info;
163
164 auto& mph = get_memory_pool(sddk::memory_t::host);
165
166 auto w = mph.get_unique_ptr<real_type<T>>(matrix_size__);
167 auto isuppz = mph.get_unique_ptr<ftn_int>(2 * matrix_size__); // for real matrix
168 auto ifail = mph.get_unique_ptr<ftn_int>(matrix_size__); // for complex matrix
169
170 ftn_int lda = A__.ld();
171 ftn_int ldz = Z__.ld();
172
173 real_type<T> abs_tol = 2 * linalg_base::dlamch('S');
174
175 ftn_int liwork;
176 ftn_int lwork;
177 int nb;
178 ftn_int lrwork = 7 * matrix_size__; // only require in complex
179
180 liwork = 10 * matrix_size__;
181 if (std::is_same<T, double>::value) {
182 nb = std::max(linalg_base::ilaenv(1, "DSYTRD", "U", matrix_size__, -1, -1, -1),
183 linalg_base::ilaenv(1, "DORMTR", "U", matrix_size__, -1, -1, -1));
184 liwork = 10 * matrix_size__;
185 lwork = std::max((nb + 6) * matrix_size__, 26 * matrix_size__);
186 } else if (std::is_same<T, std::complex<double>>::value) {
187 nb = linalg_base::ilaenv(1, "ZHETRD", "U", matrix_size__, -1, -1, -1);
188 liwork = 5 * matrix_size__;
189 lwork = (nb + 1) * matrix_size__;
190 } else if (std::is_same<T, float>::value) {
191 nb = std::max(linalg_base::ilaenv(1, "SSYTRD", "U", matrix_size__, -1, -1, -1),
192 linalg_base::ilaenv(1, "SORMTR", "U", matrix_size__, -1, -1, -1));
193 liwork = 10 * matrix_size__;
194 lwork = std::max((nb + 6) * matrix_size__, 26 * matrix_size__);
195 } else if (std::is_same<T, std::complex<float>>::value) {
196 nb = linalg_base::ilaenv(1, "CHETRD", "U", matrix_size__, -1, -1, -1);
197 liwork = 5 * matrix_size__;
198 lwork = (nb + 1) * matrix_size__;
199 }
200
201 auto work = mph.get_unique_ptr<T>(lwork);
202 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
203 auto rwork = mph.get_unique_ptr<real_type<T>>(lrwork); // only required in complex
204
205 if (std::is_same<T, double>::value) {
206 FORTRAN(dsyevr)
207 ("V", "I", "U", &matrix_size__, reinterpret_cast<double*>(A__.at(sddk::memory_t::host)), &lda,
208 reinterpret_cast<double*>(&vl), reinterpret_cast<double*>(&vu), &il, &nev__,
209 reinterpret_cast<double*>(&abs_tol), &m, reinterpret_cast<double*>(w.get()),
210 reinterpret_cast<double*>(Z__.at(sddk::memory_t::host)), &ldz, isuppz.get(),
211 reinterpret_cast<double*>(work.get()), &lwork, iwork.get(), &liwork, &info, (ftn_int)1, (ftn_int)1,
212 (ftn_int)1);
213 lwork = std::max((nb + 6) * matrix_size__, 26 * matrix_size__);
214 } else if (std::is_same<T, std::complex<double>>::value) {
215 FORTRAN(zheevx)
216 ("V", "I", "U", &matrix_size__, reinterpret_cast<std::complex<double>*>(A__.at(sddk::memory_t::host)), &lda,
217 reinterpret_cast<double*>(&vl), reinterpret_cast<double*>(&vu), &il, &nev__,
218 reinterpret_cast<double*>(&abs_tol), &m, reinterpret_cast<double*>(w.get()),
219 reinterpret_cast<std::complex<double>*>(Z__.at(sddk::memory_t::host)), &ldz,
220 reinterpret_cast<std::complex<double>*>(work.get()), &lwork, reinterpret_cast<double*>(rwork.get()),
221 iwork.get(), ifail.get(), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
222 } else if (std::is_same<T, float>::value) {
223 FORTRAN(ssyevr)
224 ("V", "I", "U", &matrix_size__, reinterpret_cast<float*>(A__.at(sddk::memory_t::host)), &lda,
225 reinterpret_cast<float*>(&vl), reinterpret_cast<float*>(&vu), &il, &nev__,
226 reinterpret_cast<float*>(&abs_tol), &m, reinterpret_cast<float*>(w.get()),
227 reinterpret_cast<float*>(Z__.at(sddk::memory_t::host)), &ldz, isuppz.get(), reinterpret_cast<float*>(work.get()),
228 &lwork, iwork.get(), &liwork, &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
229 } else if (std::is_same<T, std::complex<float>>::value) {
230 FORTRAN(cheevx)
231 ("V", "I", "U", &matrix_size__, reinterpret_cast<std::complex<float>*>(A__.at(sddk::memory_t::host)), &lda,
232 reinterpret_cast<float*>(&vl), reinterpret_cast<float*>(&vu), &il, &nev__,
233 reinterpret_cast<float*>(&abs_tol), &m, reinterpret_cast<float*>(w.get()),
234 reinterpret_cast<std::complex<float>*>(Z__.at(sddk::memory_t::host)), &ldz,
235 reinterpret_cast<std::complex<float>*>(work.get()), &lwork, reinterpret_cast<float*>(rwork.get()),
236 iwork.get(), ifail.get(), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
237 }
238
239 if (m != nev__) {
240 std::stringstream s;
241 s << "not all eigen-values are found" << std::endl
242 << "target number of eigen-values: " << nev__ << std::endl
243 << "number of eigen-values found: " << m << std::endl
244 << "matrix_size : " << matrix_size__ << std::endl
245 << "lda : " << lda << std::endl
246 << "lda : " << lda << std::endl
247 << "nb : " << nb << std::endl
248 << "liwork : " << liwork << std::endl
249 << "lwork : " << lwork << std::endl;
250 RTE_WARNING(s);
251 return 1;
252 }
253
254 if (!info) {
255 std::copy(w.get(), w.get() + nev__, eval__);
256 }
257
258 return info;
259 }
260
261 /// wrapper for solving a generalized eigen-value problem for N lowest eigen-pairs.
262 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__, dmatrix<double>& B__, double* eval__,
263 dmatrix<double>& Z__) override {
264 PROFILE("Eigensolver_lapack|dsygvx");
265 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
266 }
267
268 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__, dmatrix<std::complex<double>>& B__, double* eval__,
269 dmatrix<std::complex<double>>& Z__) override {
270 PROFILE("Eigensolver_lapack|zhegvx");
271 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
272 }
273
274 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<float>& A__, dmatrix<float>& B__, float* eval__,
275 dmatrix<float>& Z__) override {
276 PROFILE("Eigensolver_lapack|ssygvx");
277 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
278 }
279
280 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<float>>& A__, dmatrix<std::complex<float>>& B__, float* eval__,
281 dmatrix<std::complex<float>>& Z__) override {
282 PROFILE("Eigensolver_lapack|chegvx");
283 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
284 }
285
286 /// Solve a generalized eigen-value problem for N lowest eigen-pairs.
287 template <typename T>
288 int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix<T>& A__, dmatrix<T>& B__,
289 real_type<T>* eval__, dmatrix<T>& Z__)
290 {
291 ftn_int info;
292
293 ftn_int lda = A__.ld();
294 ftn_int ldb = B__.ld();
295 ftn_int ldz = Z__.ld();
296
297 real_type<T> abs_tol = 2 * linalg_base::dlamch('S');
298 real_type<T> vl{0};
299 real_type<T> vu{0};
300
301 ftn_int ione{1};
302 ftn_int m{0};
303
304 auto& mph = get_memory_pool(sddk::memory_t::host);
305
306 auto w = mph.get_unique_ptr<real_type<T>>(matrix_size__);
307 auto ifail = mph.get_unique_ptr<ftn_int>(matrix_size__);
308
309 int nb, lwork, liwork;
310 int lrwork = 0; // only required in complex
311 if (std::is_same<T, double>::value) {
312 nb = linalg_base::ilaenv(1, "DSYTRD", "U", matrix_size__, 0, 0, 0);
313 lwork = (nb + 3) * matrix_size__ + 1024;
314 liwork = 5 * matrix_size__;
315 } else if (std::is_same<T, std::complex<double>>::value) {
316 nb = linalg_base::ilaenv(1, "ZHETRD", "U", matrix_size__, 0, 0, 0);
317 lwork = (nb + 1) * matrix_size__;
318 lrwork = 7 * matrix_size__;
319 liwork = 5 * matrix_size__;
320 } else if (std::is_same<T, float>::value) {
321 nb = linalg_base::ilaenv(1, "SSYTRD", "U", matrix_size__, 0, 0, 0);
322 lwork = (nb + 3) * matrix_size__ + 1024;
323 liwork = 5 * matrix_size__;
324 } else if (std::is_same<T, std::complex<float>>::value) {
325 nb = linalg_base::ilaenv(1, "CHETRD", "U", matrix_size__, 0, 0, 0);
326 lwork = (nb + 1) * matrix_size__;
327 lrwork = 7 * matrix_size__;
328 liwork = 5 * matrix_size__;
329 }
330
331 auto work = mph.get_unique_ptr<T>(lwork);
332 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
333 auto rwork = mph.get_unique_ptr<real_type<T>>(lrwork); // only required in complex
334
335 if (std::is_same<T, double>::value) {
336 FORTRAN(dsygvx)
337 (&ione, "V", "I", "U", &matrix_size__, reinterpret_cast<double*>(A__.at(sddk::memory_t::host)), &lda,
338 reinterpret_cast<double*>(B__.at(sddk::memory_t::host)), &ldb, reinterpret_cast<double*>(&vl),
339 reinterpret_cast<double*>(&vu), &ione, &nev__, reinterpret_cast<double*>(&abs_tol), &m,
340 reinterpret_cast<double*>(w.get()), reinterpret_cast<double*>(Z__.at(sddk::memory_t::host)), &ldz,
341 reinterpret_cast<double*>(work.get()), &lwork, iwork.get(), ifail.get(), &info, (ftn_int)1, (ftn_int)1,
342 (ftn_int)1);
343 } else if (std::is_same<T, std::complex<double>>::value) {
344 FORTRAN(zhegvx)
345 (&ione, "V", "I", "U", &matrix_size__, reinterpret_cast<std::complex<double>*>(A__.at(sddk::memory_t::host)),
346 &lda, reinterpret_cast<std::complex<double>*>(B__.at(sddk::memory_t::host)), &ldb,
347 reinterpret_cast<double*>(&vl), reinterpret_cast<double*>(&vu), &ione, &nev__,
348 reinterpret_cast<double*>(&abs_tol), &m, reinterpret_cast<double*>(w.get()),
349 reinterpret_cast<std::complex<double>*>(Z__.at(sddk::memory_t::host)), &ldz,
350 reinterpret_cast<std::complex<double>*>(work.get()), &lwork, reinterpret_cast<double*>(rwork.get()),
351 iwork.get(), ifail.get(), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
352 } else if (std::is_same<T, float>::value) {
353 FORTRAN(ssygvx)
354 (&ione, "V", "I", "U", &matrix_size__, reinterpret_cast<float*>(A__.at(sddk::memory_t::host)), &lda,
355 reinterpret_cast<float*>(B__.at(sddk::memory_t::host)), &ldb, reinterpret_cast<float*>(&vl),
356 reinterpret_cast<float*>(&vu), &ione, &nev__, reinterpret_cast<float*>(&abs_tol), &m,
357 reinterpret_cast<float*>(w.get()), reinterpret_cast<float*>(Z__.at(sddk::memory_t::host)), &ldz,
358 reinterpret_cast<float*>(work.get()), &lwork, iwork.get(), ifail.get(), &info, (ftn_int)1, (ftn_int)1,
359 (ftn_int)1);
360 } else if (std::is_same<T, std::complex<float>>::value) {
361 FORTRAN(chegvx)
362 (&ione, "V", "I", "U", &matrix_size__, reinterpret_cast<std::complex<float>*>(A__.at(sddk::memory_t::host)), &lda,
363 reinterpret_cast<std::complex<float>*>(B__.at(sddk::memory_t::host)), &ldb, reinterpret_cast<float*>(&vl),
364 reinterpret_cast<float*>(&vu), &ione, &nev__, reinterpret_cast<float*>(&abs_tol), &m,
365 reinterpret_cast<float*>(w.get()), reinterpret_cast<std::complex<float>*>(Z__.at(sddk::memory_t::host)), &ldz,
366 reinterpret_cast<std::complex<float>*>(work.get()), &lwork, reinterpret_cast<float*>(rwork.get()),
367 iwork.get(), ifail.get(), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
368 }
369
370 if (m != nev__) {
371 std::stringstream s;
372 s << "not all eigen-values are found" << std::endl
373 << "target number of eigen-values: " << nev__ << std::endl
374 << "number of eigen-values found: " << m;
375 RTE_WARNING(s);
376 return 1;
377 }
378
379 if (!info) {
380 std::copy(w.get(), w.get() + nev__, eval__);
381 }
382
383 return info;
384 }
385
386};
387
388#ifdef SIRIUS_ELPA
390{
391 private:
392 int stage_;
393
394 //template <typename T>
395 //void to_std(ftn_int matrix_size__, dmatrix<T>& A__, dmatrix<T>& B__, dmatrix<T>& Z__) const
396 //{
397 // PROFILE("Eigensolver_elpa|to_std");
398
399 // if (A__.num_cols_local() != Z__.num_cols_local()) {
400 // std::stringstream s;
401 // s << "number of columns in A and Z doesn't match" << std::endl
402 // << " number of cols in A (local and global): " << A__.num_cols_local() << " " << A__.num_cols()
403 // << std::endl
404 // << " number of cols in B (local and global): " << B__.num_cols_local() << " " << B__.num_cols()
405 // << std::endl
406 // << " number of cols in Z (local and global): " << Z__.num_cols_local() << " " << Z__.num_cols()
407 // << std::endl
408 // << " number of rows in A (local and global): " << A__.num_rows_local() << " " << A__.num_rows()
409 // << std::endl
410 // << " number of rows in B (local and global): " << B__.num_rows_local() << " " << B__.num_rows()
411 // << std::endl
412 // << " number of rows in Z (local and global): " << Z__.num_rows_local() << " " << Z__.num_rows()
413 // << std::endl;
414 // RTE_THROW(s);
415 // }
416 // if (A__.bs_row() != A__.bs_col()) {
417 // RTE_THROW("wrong block size");
418 // }
419
420 // /* Cholesky factorization B = U^{H}*U */
421 // linalg(linalg_t::scalapack).potrf(matrix_size__, B__.at(sddk::memory_t::host), B__.ld(), B__.descriptor());
422 // /* inversion of the triangular matrix */
423 // linalg(linalg_t::scalapack).trtri(matrix_size__, B__.at(sddk::memory_t::host), B__.ld(), B__.descriptor());
424 // /* U^{-1} is upper triangular matrix */
425 // for (int i = 0; i < matrix_size__; i++) {
426 // for (int j = i + 1; j < matrix_size__; j++) {
427 // B__.set(j, i, 0);
428 // }
429 // }
430 // /* transform to standard eigen-problem */
431 // /* A * U{-1} -> Z */
432 // linalg(linalg_t::scalapack).gemm('N', 'N', matrix_size__, matrix_size__, matrix_size__,
433 // &linalg_const<T>::one(), A__, 0, 0, B__, 0, 0, &linalg_const<T>::zero(), Z__, 0, 0);
434 // /* U^{-H} * Z = U{-H} * A * U^{-1} -> A */
435 // linalg(linalg_t::scalapack).gemm('C', 'N', matrix_size__, matrix_size__, matrix_size__,
436 // &linalg_const<T>::one(), B__, 0, 0, Z__, 0, 0, &linalg_const<T>::zero(), A__, 0, 0);
437 //}
438
439 //template <typename T>
440 //void bt(ftn_int matrix_size__, ftn_int nev__, dmatrix<T>& A__, dmatrix<T>& B__, dmatrix<T>& Z__) const
441 //{
442 // PROFILE("Eigensolver_elpa|bt");
443 // /* back-transform of eigen-vectors */
444 // linalg(linalg_t::scalapack).gemm('N', 'N', matrix_size__, nev__, matrix_size__, &linalg_const<T>::one(),
445 // B__, 0, 0, Z__, 0, 0, &linalg_const<T>::zero(), A__, 0, 0);
446 // A__ >> Z__;
447
448 //}
449 public:
450 Eigensolver_elpa(int stage__);
451 static void initialize();
452
453 static void finalize();
454 /// Solve a generalized eigen-value problem for N lowest eigen-pairs.
455 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__, dmatrix<double>& B__,
456 double* eval__, dmatrix<double>& Z__) override;
457
458 /// Solve a generalized eigen-value problem for N lowest eigen-pairs.
459 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__, dmatrix<std::complex<double>>& B__,
460 double* eval__, dmatrix<std::complex<double>>& Z__) override;
461
462 /// Solve a generalized eigen-value problem for all eigen-pairs.
463 int solve(ftn_int matrix_size__, dmatrix<double>& A__, dmatrix<double>& B__, double* eval__, dmatrix<double>& Z__) override;
464
465 /// Solve a generalized eigen-value problem for all eigen-pairs.
466 int solve(ftn_int matrix_size__, dmatrix<std::complex<double>>& A__, dmatrix<std::complex<double>>& B__, double* eval__,
467 dmatrix<std::complex<double>>& Z__) override;
468
469 /// Solve a standard eigen-value problem for N lowest eigen-pairs.
470 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__, double* eval__, dmatrix<double>& Z__) override;
471
472 /// Solve a standard eigen-value problem for N lowest eigen-pairs.
473 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__, double* eval__,
474 dmatrix<std::complex<double>>& Z__) override;
475
476 /// Solve a standard eigen-value problem for all eigen-pairs.
477 int solve(ftn_int matrix_size__, dmatrix<double>& A__, double* eval__, dmatrix<double>& Z__) override;
478 /// Solve a standard eigen-value problem for all eigen-pairs.
479 int solve(ftn_int matrix_size__, dmatrix<std::complex<double>>& A__, double* eval__, dmatrix<std::complex<double>>& Z__) override;
480};
481#else
482class Eigensolver_elpa : public Eigensolver
483{
484 public:
485 Eigensolver_elpa(int stage__)
486 : Eigensolver(ev_solver_t::elpa, true, sddk::memory_t::host, sddk::memory_t::host)
487 {
488 }
489};
490#endif
491
492#ifdef SIRIUS_SCALAPACK
494{
495 private:
496 double const ortfac_{1e-6};
497 double const abstol_{1e-12};
498
499 public:
501 : Eigensolver(ev_solver_t::scalapack, true, sddk::memory_t::host, sddk::memory_t::host)
502 {
503 }
504
505 /// Solve a standard eigen-value problem for all eigen-pairs.
506 template <typename T, typename = std::enable_if_t<!std::is_scalar<T>::value>>
507 int solve_(ftn_int matrix_size__, dmatrix<T>& A__, real_type<T>* eval__, dmatrix<T>& Z__)
508 {
509 ftn_int desca[9];
510 linalg_base::descinit(desca, matrix_size__, matrix_size__, A__.bs_row(), A__.bs_col(), 0, 0,
511 A__.blacs_grid().context(), A__.ld());
512
513 ftn_int descz[9];
514 linalg_base::descinit(descz, matrix_size__, matrix_size__, Z__.bs_row(), Z__.bs_col(), 0, 0,
515 Z__.blacs_grid().context(), Z__.ld());
516
517 ftn_int info;
518 ftn_int ione{1};
519
520 ftn_int lwork{-1};
521 ftn_int lrwork{-1};
522 ftn_int liwork{-1};
523 T work1;
524 real_type<T> rwork1;
525 ftn_int iwork1;
526
527 /* work size query */
528 if (std::is_same<T, std::complex<double>>::value) {
529 FORTRAN(pzheevd)
530 ("V", "U", &matrix_size__, reinterpret_cast<std::complex<double>*>(A__.at(sddk::memory_t::host)), &ione, &ione,
531 desca, reinterpret_cast<double*>(eval__), reinterpret_cast<std::complex<double>*>(Z__.at(sddk::memory_t::host)),
532 &ione, &ione, descz, reinterpret_cast<std::complex<double>*>(&work1), &lwork, reinterpret_cast<double*>(&rwork1),
533 &lrwork, &iwork1, &liwork, &info, (ftn_int)1, (ftn_int)1);
534 } else if (std::is_same<T, std::complex<float>>::value) {
535 FORTRAN(pcheevd)
536 ("V", "U", &matrix_size__, reinterpret_cast<std::complex<float>*>(A__.at(sddk::memory_t::host)), &ione, &ione,
537 desca, reinterpret_cast<float*>(eval__), reinterpret_cast<std::complex<float>*>(Z__.at(sddk::memory_t::host)),
538 &ione, &ione, descz, reinterpret_cast<std::complex<float>*>(&work1), &lwork, reinterpret_cast<float*>(&rwork1),
539 &lrwork, &iwork1, &liwork, &info, (ftn_int)1, (ftn_int)1);
540 }
541
542 lwork = static_cast<ftn_int>(work1.real()) + 1;
543 lrwork = static_cast<ftn_int>(rwork1) + 1;
544 liwork = iwork1;
545
546 auto& mph = get_memory_pool(sddk::memory_t::host);
547 auto work = mph.get_unique_ptr<T>(lwork);
548 auto rwork = mph.get_unique_ptr<real_type<T>>(lrwork);
549 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
550
551 if (std::is_same<T, std::complex<double>>::value) {
552 FORTRAN(pzheevd)
553 ("V", "U", &matrix_size__, reinterpret_cast<std::complex<double>*>(A__.at(sddk::memory_t::host)), &ione, &ione,
554 desca, reinterpret_cast<double*>(eval__), reinterpret_cast<std::complex<double>*>(Z__.at(sddk::memory_t::host)),
555 &ione, &ione, descz, reinterpret_cast<std::complex<double>*>(work.get()), &lwork, reinterpret_cast<double*>(rwork.get()),
556 &lrwork, iwork.get(), &liwork, &info, (ftn_int)1, (ftn_int)1);
557 } else if (std::is_same<T, std::complex<float>>::value) {
558 FORTRAN(pcheevd)
559 ("V", "U", &matrix_size__, reinterpret_cast<std::complex<float>*>(A__.at(sddk::memory_t::host)), &ione, &ione,
560 desca, reinterpret_cast<float*>(eval__), reinterpret_cast<std::complex<float>*>(Z__.at(sddk::memory_t::host)),
561 &ione, &ione, descz, reinterpret_cast<std::complex<float>*>(work.get()), &lwork, reinterpret_cast<float*>(rwork.get()),
562 &lrwork, iwork.get(), &liwork, &info, (ftn_int)1, (ftn_int)1);
563 }
564 return info;
565 }
566
567 /// wrapper for solving a standard eigen-value problem for all eigen-pairs.
568 int solve(ftn_int matrix_size__, dmatrix<std::complex<double>>& A__, double* eval__, dmatrix<std::complex<double>>& Z__) override
569 {
570 PROFILE("Eigensolver_scalapack|pzheevd");
571 return solve_(matrix_size__, A__, eval__, Z__);
572 }
573
574 int solve(ftn_int matrix_size__, dmatrix<std::complex<float>>& A__, float* eval__, dmatrix<std::complex<float>>& Z__) override
575 {
576 PROFILE("Eigensolver_scalapack|pcheevd");
577 return solve_(matrix_size__, A__, eval__, Z__);
578 }
579
580 template <typename T, typename = std::enable_if_t<std::is_scalar<T>::value>>
581 int solve_(ftn_int matrix_size__, dmatrix<T>& A__, T* eval__, dmatrix<T>& Z__)
582 {
583 ftn_int info;
584 ftn_int ione{1};
585
586 ftn_int lwork{-1};
587 ftn_int liwork{-1};
588 T work1[10];
589 ftn_int iwork1[10];
590
591 /* work size query */
592 if (std::is_same<T, double>::value) {
593 FORTRAN(pdsyevd)
594 ("V", "U", &matrix_size__, reinterpret_cast<double*>(A__.at(sddk::memory_t::host)), &ione, &ione,
595 const_cast<ftn_int*>(A__.descriptor()), reinterpret_cast<double*>(eval__),
596 reinterpret_cast<double*>(Z__.at(sddk::memory_t::host)), &ione, &ione,
597 const_cast<ftn_int*>(Z__.descriptor()), reinterpret_cast<double*>(work1), &lwork, iwork1, &liwork, &info,
598 (ftn_int)1, (ftn_int)1);
599 } else if (std::is_same<T, float>::value) {
600 FORTRAN(pssyevd)
601 ("V", "U", &matrix_size__, reinterpret_cast<float*>(A__.at(sddk::memory_t::host)), &ione, &ione,
602 const_cast<ftn_int*>(A__.descriptor()), reinterpret_cast<float*>(eval__),
603 reinterpret_cast<float*>(Z__.at(sddk::memory_t::host)), &ione, &ione,
604 const_cast<ftn_int*>(Z__.descriptor()), reinterpret_cast<float*>(work1), &lwork, iwork1, &liwork, &info,
605 (ftn_int)1, (ftn_int)1);
606 }
607
608 lwork = static_cast<ftn_int>(work1[0]) + 1;
609 liwork = iwork1[0];
610
611 auto& mph = get_memory_pool(sddk::memory_t::host);
612 auto work = mph.get_unique_ptr<T>(lwork);
613 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
614
615 if (std::is_same<T, double>::value) {
616 FORTRAN(pdsyevd)
617 ("V", "U", &matrix_size__, reinterpret_cast<double*>(A__.at(sddk::memory_t::host)), &ione, &ione,
618 const_cast<ftn_int*>(A__.descriptor()), reinterpret_cast<double*>(eval__),
619 reinterpret_cast<double*>(Z__.at(sddk::memory_t::host)), &ione, &ione,
620 const_cast<ftn_int*>(Z__.descriptor()), reinterpret_cast<double*>(work.get()), &lwork, iwork.get(),
621 &liwork, &info, (ftn_int)1, (ftn_int)1);
622 } else if (std::is_same<T, float>::value) {
623 FORTRAN(pssyevd)
624 ("V", "U", &matrix_size__, reinterpret_cast<float*>(A__.at(sddk::memory_t::host)), &ione, &ione,
625 const_cast<ftn_int*>(A__.descriptor()), reinterpret_cast<float*>(eval__),
626 reinterpret_cast<float*>(Z__.at(sddk::memory_t::host)), &ione, &ione,
627 const_cast<ftn_int*>(Z__.descriptor()), reinterpret_cast<float*>(work.get()), &lwork, iwork.get(),
628 &liwork, &info, (ftn_int)1, (ftn_int)1);
629 }
630 return info;
631 }
632
633 int solve(ftn_int matrix_size__, dmatrix<double>& A__, double* eval__, dmatrix<double>& Z__) override
634 {
635 PROFILE("Eigensolver_scalapack|pdsyevd");
636 return solve_(matrix_size__, A__, eval__, Z__);
637 }
638
639 int solve(ftn_int matrix_size__, dmatrix<float>& A__, float* eval__, dmatrix<float>& Z__) override
640 {
641 PROFILE("Eigensolver_scalapack|pssyevd");
642 return solve_(matrix_size__, A__, eval__, Z__);
643 }
644
645 /// Solve a standard eigen-value problem for N lowest eigen-pairs.
646 template <typename T, typename = std::enable_if_t<std::is_scalar<T>::value>>
647 int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix<T>& A__, T* eval__, dmatrix<T>& Z__)
648 {
649 ftn_int desca[9];
650 linalg_base::descinit(desca, matrix_size__, matrix_size__, A__.bs_row(), A__.bs_col(), 0, 0,
651 A__.blacs_grid().context(), A__.ld());
652
653 ftn_int descz[9];
654 linalg_base::descinit(descz, matrix_size__, matrix_size__, Z__.bs_row(), Z__.bs_col(), 0, 0,
655 Z__.blacs_grid().context(), Z__.ld());
656
657 ftn_int ione{1};
658
659 ftn_int m{-1};
660 ftn_int nz{-1};
661 T d1;
662 ftn_int info{-1};
663
664 auto& mph = get_memory_pool(sddk::memory_t::host);
665
666 auto ifail = mph.get_unique_ptr<ftn_int>(matrix_size__);
667 auto iclustr = mph.get_unique_ptr<ftn_int>(2 * A__.blacs_grid().comm().size());
668 auto gap = mph.get_unique_ptr<T>(A__.blacs_grid().comm().size());
669 auto w = mph.get_unique_ptr<T>(matrix_size__);
670
671 /* work size query */
672 T work3[3];
673 ftn_int iwork1;
674 ftn_int lwork{-1};
675 ftn_int liwork{-1};
676
677 if (std::is_same<T, double>::value) {
678 FORTRAN(pdsyevx)
679 ("V", "I", "U", &matrix_size__, reinterpret_cast<double*>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
680 reinterpret_cast<double*>(&d1), reinterpret_cast<double*>(&d1), &ione, &nev__, &abstol_, &m, &nz,
681 reinterpret_cast<double*>(w.get()), &ortfac_, reinterpret_cast<double*>(Z__.at(sddk::memory_t::host)), &ione, &ione,
682 descz, reinterpret_cast<double*>(work3), &lwork, &iwork1, &liwork, ifail.get(), iclustr.get(),
683 reinterpret_cast<double*>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
684 } else if (std::is_same<T, float>::value) {
685 FORTRAN(pssyevx)
686 ("V", "I", "U", &matrix_size__, reinterpret_cast<float*>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
687 reinterpret_cast<float*>(&d1), reinterpret_cast<float*>(&d1), &ione, &nev__, &abstol_, &m, &nz,
688 reinterpret_cast<float*>(w.get()), &ortfac_, reinterpret_cast<float*>(Z__.at(sddk::memory_t::host)), &ione, &ione,
689 descz, reinterpret_cast<float*>(work3), &lwork, &iwork1, &liwork, ifail.get(), iclustr.get(),
690 reinterpret_cast<float*>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
691 }
692
693 lwork = static_cast<ftn_int>(work3[0]) + 4 * (1 << 20);
694 liwork = iwork1;
695
696 auto work = mph.get_unique_ptr<T>(lwork);
697 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
698
699 if (std::is_same<T, double>::value) {
700 FORTRAN(pdsyevx)
701 ("V", "I", "U", &matrix_size__, reinterpret_cast<double*>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
702 reinterpret_cast<double*>(&d1), reinterpret_cast<double*>(&d1), &ione, &nev__, &abstol_, &m, &nz,
703 reinterpret_cast<double*>(w.get()), &ortfac_, reinterpret_cast<double*>(Z__.at(sddk::memory_t::host)), &ione, &ione,
704 descz, reinterpret_cast<double*>(work.get()), &lwork, iwork.get(), &liwork, ifail.get(), iclustr.get(),
705 reinterpret_cast<double*>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
706 } else if (std::is_same<T, float>::value) {
707 FORTRAN(pssyevx)
708 ("V", "I", "U", &matrix_size__, reinterpret_cast<float*>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
709 reinterpret_cast<float*>(&d1), reinterpret_cast<float*>(&d1), &ione, &nev__, &abstol_, &m, &nz,
710 reinterpret_cast<float*>(w.get()), &ortfac_, reinterpret_cast<float*>(Z__.at(sddk::memory_t::host)), &ione, &ione,
711 descz, reinterpret_cast<float*>(work.get()), &lwork, iwork.get(), &liwork, ifail.get(), iclustr.get(),
712 reinterpret_cast<float*>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
713 }
714
715 if ((m != nev__) || (nz != nev__)) {
716 RTE_WARNING("Not all eigen-vectors or eigen-values are found.");
717 return 1;
718 }
719
720 if (info) {
721 if ((info / 2) % 2) {
722 std::stringstream s;
723 s << "eigenvectors corresponding to one or more clusters of eigenvalues" << std::endl
724 << "could not be reorthogonalized because of insufficient workspace" << std::endl;
725
726 int k = A__.blacs_grid().comm().size();
727 for (int i = 0; i < A__.blacs_grid().comm().size() - 1; i++) {
728 if ((iclustr.get()[2 * i + 1] != 0) && (iclustr.get()[2 * (i + 1)] == 0)) {
729 k = i + 1;
730 break;
731 }
732 }
733
734 s << "number of eigenvalue clusters : " << k << std::endl;
735 for (int i = 0; i < k; i++) {
736 s << iclustr.get()[2 * i] << " : " << iclustr.get()[2 * i + 1] << std::endl;
737 }
738 RTE_WARNING(s);
739 }
740
741 std::stringstream s;
742 if (std::is_same<T, double>::value) {
743 s << "pdsyevx returned " << info;
744 } else if (std::is_same<T, float>::value) {
745 s << "pssyevx returned " << info;
746 }
747 RTE_WARNING(s);
748 } else {
749 std::copy(w.get(), w.get() + nev__, eval__);
750 }
751
752 return info;
753 }
754
755 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__, double* eval__, dmatrix<double>& Z__) override
756 {
757 PROFILE("Eigensolver_scalapack|pdsyevx");
758 return solve_(matrix_size__, nev__, A__, eval__, Z__);
759 }
760
761 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<float>& A__, float* eval__, dmatrix<float>& Z__) override
762 {
763 PROFILE("Eigensolver_scalapack|pssyevx");
764 return solve_(matrix_size__, nev__, A__, eval__, Z__);
765 }
766
767 /// Solve a standard eigen-value problem for N lowest eigen-pairs.
768 template <typename T, typename = std::enable_if_t<!std::is_scalar<T>::value>>
769 int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix<T>& A__, real_type<T>* eval__, dmatrix<T>& Z__)
770 {
771 ftn_int desca[9];
772 linalg_base::descinit(desca, matrix_size__, matrix_size__, A__.bs_row(), A__.bs_col(), 0, 0,
773 A__.blacs_grid().context(), A__.ld());
774
775 ftn_int descz[9];
776 linalg_base::descinit(descz, matrix_size__, matrix_size__, Z__.bs_row(), Z__.bs_col(), 0, 0,
777 Z__.blacs_grid().context(), Z__.ld());
778
779 ftn_int ione{1};
780
781 ftn_int m{-1};
782 ftn_int nz{-1};
783 real_type<T> d1;
784 ftn_int info{-1};
785
786 auto& mph = get_memory_pool(sddk::memory_t::host);
787
788 auto ifail = mph.get_unique_ptr<ftn_int>(matrix_size__);
789 auto iclustr = mph.get_unique_ptr<ftn_int>(2 * A__.blacs_grid().comm().size());
790 auto gap = mph.get_unique_ptr<real_type<T>>(A__.blacs_grid().comm().size());
791 auto w = mph.get_unique_ptr<real_type<T>>(matrix_size__);
792
793 /* work size query */
794 T work3[3];
795 real_type<T> rwork3[3];
796 ftn_int iwork1;
797 ftn_int lwork = -1;
798 ftn_int lrwork = -1;
799 ftn_int liwork = -1;
800
801 if (std::is_same<T, std::complex<double>>::value) {
802 FORTRAN(pzheevx)
803 ("V", "I", "U", &matrix_size__, reinterpret_cast<std::complex<double>*>(A__.at(sddk::memory_t::host)), &ione, &ione,
804 desca, reinterpret_cast<double*>(&d1), reinterpret_cast<double*>(&d1), &ione, &nev__, &abstol_, &m, &nz,
805 reinterpret_cast<double*>(w.get()), &ortfac_, reinterpret_cast<std::complex<double>*>(Z__.at(sddk::memory_t::host)),
806 &ione, &ione, descz, reinterpret_cast<std::complex<double>*>(work3), &lwork, reinterpret_cast<double*>(rwork3),
807 &lrwork, &iwork1, &liwork, ifail.get(), iclustr.get(), reinterpret_cast<double*>(gap.get()), &info, (ftn_int)1,
808 (ftn_int)1, (ftn_int)1);
809 } else if (std::is_same<T, std::complex<float>>::value) {
810 FORTRAN(pcheevx)
811 ("V", "I", "U", &matrix_size__, reinterpret_cast<std::complex<float>*>(A__.at(sddk::memory_t::host)), &ione, &ione,
812 desca, reinterpret_cast<float*>(&d1), reinterpret_cast<float*>(&d1), &ione, &nev__, &abstol_, &m, &nz,
813 reinterpret_cast<float*>(w.get()), &ortfac_, reinterpret_cast<std::complex<float>*>(Z__.at(sddk::memory_t::host)),
814 &ione, &ione, descz, reinterpret_cast<std::complex<float>*>(work3), &lwork, reinterpret_cast<float*>(rwork3),
815 &lrwork, &iwork1, &liwork, ifail.get(), iclustr.get(), reinterpret_cast<float*>(gap.get()), &info, (ftn_int)1,
816 (ftn_int)1, (ftn_int)1);
817 }
818
819 lwork = static_cast<int32_t>(work3[0].real()) + (1 << 16);
820 lrwork = static_cast<int32_t>(rwork3[0]) + (1 << 16);
821 liwork = iwork1;
822
823 auto work = mph.get_unique_ptr<T>(lwork);
824 auto rwork = mph.get_unique_ptr<real_type<T>>(lrwork);
825 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
826
827 if (std::is_same<T, std::complex<double>>::value) {
828 FORTRAN(pzheevx)
829 ("V", "I", "U", &matrix_size__, reinterpret_cast<std::complex<double>*>(A__.at(sddk::memory_t::host)), &ione, &ione,
830 desca, reinterpret_cast<double*>(&d1), reinterpret_cast<double*>(&d1), &ione, &nev__, &abstol_, &m, &nz,
831 reinterpret_cast<double*>(w.get()), &ortfac_, reinterpret_cast<std::complex<double>*>(Z__.at(sddk::memory_t::host)),
832 &ione, &ione, descz, reinterpret_cast<std::complex<double>*>(work.get()), &lwork, reinterpret_cast<double*>(rwork.get()),
833 &lrwork, iwork.get(), &liwork, ifail.get(), iclustr.get(), reinterpret_cast<double*>(gap.get()), &info,
834 (ftn_int)1, (ftn_int)1, (ftn_int)1);
835 } else if (std::is_same<T, std::complex<float>>::value) {
836 FORTRAN(pcheevx)
837 ("V", "I", "U", &matrix_size__, reinterpret_cast<std::complex<float>*>(A__.at(sddk::memory_t::host)), &ione, &ione,
838 desca, reinterpret_cast<float*>(&d1), reinterpret_cast<float*>(&d1), &ione, &nev__, &abstol_, &m, &nz,
839 reinterpret_cast<float*>(w.get()), &ortfac_, reinterpret_cast<std::complex<float>*>(Z__.at(sddk::memory_t::host)),
840 &ione, &ione, descz, reinterpret_cast<std::complex<float>*>(work.get()), &lwork, reinterpret_cast<float*>(rwork.get()),
841 &lrwork, iwork.get(), &liwork, ifail.get(), iclustr.get(), reinterpret_cast<float*>(gap.get()), &info,
842 (ftn_int)1, (ftn_int)1, (ftn_int)1);
843 }
844
845 if ((m != nev__) || (nz != nev__)) {
846 RTE_WARNING("Not all eigen-vectors or eigen-values are found.");
847 return 1;
848 }
849
850 if (info) {
851 if ((info / 2) % 2) {
852 std::stringstream s;
853 s << "eigenvectors corresponding to one or more clusters of eigenvalues" << std::endl
854 << "could not be reorthogonalized because of insufficient workspace" << std::endl;
855
856 int k = A__.blacs_grid().comm().size();
857 for (int i = 0; i < A__.blacs_grid().comm().size() - 1; i++) {
858 if ((iclustr.get()[2 * i + 1] != 0) && (iclustr.get()[2 * (i + 1)] == 0)) {
859 k = i + 1;
860 break;
861 }
862 }
863
864 s << "number of eigenvalue clusters : " << k << std::endl;
865 for (int i = 0; i < k; i++) {
866 s << iclustr.get()[2 * i] << " : " << iclustr.get()[2 * i + 1] << std::endl;
867 }
868 RTE_WARNING(s);
869 }
870
871 std::stringstream s;
872 if (std::is_same<T, std::complex<double>>::value) {
873 s << "pzheevx returned " << info;
874 } else if (std::is_same<T, std::complex<float>>::value) {
875 s << "pcheevx returned " << info;
876 }
877 RTE_WARNING(s);
878 } else {
879 std::copy(w.get(), w.get() + nev__, eval__);
880 }
881
882 return info;
883 }
884
885 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__, double* eval__, dmatrix<std::complex<double>>& Z__) override
886 {
887 PROFILE("Eigensolver_scalapack|pzheevx");
888 return solve_(matrix_size__, nev__, A__, eval__, Z__);
889 }
890
891 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<float>>& A__, float* eval__, dmatrix<std::complex<float>>& Z__) override
892 {
893 PROFILE("Eigensolver_scalapack|pcheevx");
894 return solve_(matrix_size__, nev__, A__, eval__, Z__);
895 }
896
897 /// Solve a generalized eigen-value problem for N lowest eigen-pairs.
898 template <typename T, typename = std::enable_if_t<std::is_scalar<T>::value>>
899 int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix<T>& A__, dmatrix<T>& B__, T* eval__, dmatrix<T>& Z__)
900 {
901 ftn_int desca[9];
902 linalg_base::descinit(desca, matrix_size__, matrix_size__, A__.bs_row(), A__.bs_col(), 0, 0,
903 A__.blacs_grid().context(), A__.ld());
904
905 ftn_int descb[9];
906 linalg_base::descinit(descb, matrix_size__, matrix_size__, B__.bs_row(), B__.bs_col(), 0, 0,
907 B__.blacs_grid().context(), B__.ld());
908
909 ftn_int descz[9];
910 linalg_base::descinit(descz, matrix_size__, matrix_size__, Z__.bs_row(), Z__.bs_col(), 0, 0,
911 Z__.blacs_grid().context(), Z__.ld());
912
913 auto& mph = get_memory_pool(sddk::memory_t::host);
914 auto ifail = mph.get_unique_ptr<ftn_int>(matrix_size__);
915 auto iclustr = mph.get_unique_ptr<ftn_int>(2 * A__.blacs_grid().comm().size());
916 auto gap = mph.get_unique_ptr<T>(A__.blacs_grid().comm().size());
917 auto w = mph.get_unique_ptr<T>(matrix_size__);
918
919 ftn_int ione{1};
920
921 ftn_int m{-1};
922 ftn_int nz{-1};
923 T d1;
924 ftn_int info{-1};
925
926 T work1[3];
927 ftn_int lwork = -1;
928 ftn_int liwork = -1;
929 /* work size query */
930 if (std::is_same<T, double>::value) {
931 FORTRAN(pdsygvx)
932 (&ione, "V", "I", "U", &matrix_size__, reinterpret_cast<double*>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
933 reinterpret_cast<double*>(B__.at(sddk::memory_t::host)), &ione, &ione, descb, reinterpret_cast<double*>(&d1),
934 reinterpret_cast<double*>(&d1), &ione, &nev__, &abstol_, &m, &nz, reinterpret_cast<double*>(w.get()), &ortfac_,
935 reinterpret_cast<double*>(Z__.at(sddk::memory_t::host)), &ione, &ione, descz, reinterpret_cast<double*>(work1), &lwork, &liwork, &lwork,
936 ifail.get(), iclustr.get(), reinterpret_cast<double*>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
937 } else if (std::is_same<T, float>::value) {
938 FORTRAN(pssygvx)
939 (&ione, "V", "I", "U", &matrix_size__, reinterpret_cast<float*>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
940 reinterpret_cast<float*>(B__.at(sddk::memory_t::host)), &ione, &ione, descb, reinterpret_cast<float*>(&d1),
941 reinterpret_cast<float*>(&d1), &ione, &nev__, &abstol_, &m, &nz, reinterpret_cast<float*>(w.get()), &ortfac_,
942 reinterpret_cast<float*>(Z__.at(sddk::memory_t::host)), &ione, &ione, descz, reinterpret_cast<float*>(work1), &lwork, &liwork, &lwork,
943 ifail.get(), iclustr.get(), reinterpret_cast<float*>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
944 }
945
946 lwork = static_cast<int32_t>(work1[0]) + 4 * (1 << 20);
947
948 auto work = mph.get_unique_ptr<T>(lwork);
949 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
950
951 if (std::is_same<T, double>::value) {
952 FORTRAN(pdsygvx)
953 (&ione, "V", "I", "U", &matrix_size__, reinterpret_cast<double*>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
954 reinterpret_cast<double*>(B__.at(sddk::memory_t::host)), &ione, &ione, descb, reinterpret_cast<double*>(&d1),
955 reinterpret_cast<double*>(&d1), &ione, &nev__, &abstol_, &m, &nz, reinterpret_cast<double*>(w.get()), &ortfac_,
956 reinterpret_cast<double*>(Z__.at(sddk::memory_t::host)), &ione, &ione, descz, reinterpret_cast<double*>(work.get()),
957 &lwork, iwork.get(), &liwork, ifail.get(), iclustr.get(), reinterpret_cast<double*>(gap.get()), &info,
958 (ftn_int)1, (ftn_int)1, (ftn_int)1);
959 } else if (std::is_same<T, float>::value) {
960 FORTRAN(pssygvx)
961 (&ione, "V", "I", "U", &matrix_size__, reinterpret_cast<float*>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
962 reinterpret_cast<float*>(B__.at(sddk::memory_t::host)), &ione, &ione, descb, reinterpret_cast<float*>(&d1),
963 reinterpret_cast<float*>(&d1), &ione, &nev__, &abstol_, &m, &nz, reinterpret_cast<float*>(w.get()), &ortfac_,
964 reinterpret_cast<float*>(Z__.at(sddk::memory_t::host)), &ione, &ione, descz, reinterpret_cast<float*>(work.get()),
965 &lwork, iwork.get(), &liwork, ifail.get(), iclustr.get(), reinterpret_cast<float*>(gap.get()), &info,
966 (ftn_int)1, (ftn_int)1, (ftn_int)1);
967 }
968
969 if ((m != nev__) || (nz != nev__)) {
970 RTE_WARNING("Not all eigen-vectors or eigen-values are found.");
971 return 1;
972 }
973
974 if (info) {
975 if ((info / 2) % 2) {
976 std::stringstream s;
977 s << "eigenvectors corresponding to one or more clusters of eigenvalues" << std::endl
978 << "could not be reorthogonalized because of insufficient workspace" << std::endl;
979
980 int k = A__.blacs_grid().comm().size();
981 for (int i = 0; i < A__.blacs_grid().comm().size() - 1; i++) {
982 if ((iclustr.get()[2 * i + 1] != 0) && (iclustr.get()[2 * (i + 1)] == 0)) {
983 k = i + 1;
984 break;
985 }
986 }
987
988 s << "number of eigenvalue clusters : " << k << std::endl;
989 for (int i = 0; i < k; i++) {
990 s << iclustr.get()[2 * i] << " : " << iclustr.get()[2 * i + 1] << std::endl;
991 }
992 RTE_WARNING(s);
993 }
994
995 std::stringstream s;
996 if (std::is_same<T, double>::value) {
997 s << "pdsygvx returned " << info;
998 } else if (std::is_same<T, float>::value) {
999 s << "pssygvx returned " << info;
1000 }
1001 RTE_WARNING(s);
1002 } else {
1003 std::copy(w.get(), w.get() + nev__, eval__);
1004 }
1005
1006 return info;
1007 }
1008
1009 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__, dmatrix<double>& B__, double* eval__, dmatrix<double>& Z__) override
1010 {
1011 PROFILE("Eigensolver_scalapack|pdsygvx");
1012 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1013 }
1014
1015 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<float>& A__, dmatrix<float>& B__, float* eval__, dmatrix<float>& Z__) override
1016 {
1017 PROFILE("Eigensolver_scalapack|pssygvx");
1018 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1019 }
1020
1021 /// Solve a generalized eigen-value problem for N lowest eigen-pairs.
1022 template <typename T, typename = std::enable_if_t<!std::is_scalar<T>::value>>
1023 int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix<T>& A__, dmatrix<T>& B__, real_type<T>* eval__, dmatrix<T>& Z__)
1024 {
1025 ftn_int desca[9];
1026 linalg_base::descinit(desca, matrix_size__, matrix_size__, A__.bs_row(), A__.bs_col(), 0, 0,
1027 A__.blacs_grid().context(), A__.ld());
1028
1029 ftn_int descb[9];
1030 linalg_base::descinit(descb, matrix_size__, matrix_size__, B__.bs_row(), B__.bs_col(), 0, 0,
1031 B__.blacs_grid().context(), B__.ld());
1032
1033 ftn_int descz[9];
1034 linalg_base::descinit(descz, matrix_size__, matrix_size__, Z__.bs_row(), Z__.bs_col(), 0, 0,
1035 Z__.blacs_grid().context(), Z__.ld());
1036
1037 auto& mph = get_memory_pool(sddk::memory_t::host);
1038
1039 auto ifail = mph.get_unique_ptr<ftn_int>(matrix_size__);
1040 auto iclustr = mph.get_unique_ptr<ftn_int>(2 * A__.blacs_grid().comm().size());
1041 auto gap = mph.get_unique_ptr<real_type<T>>(A__.blacs_grid().comm().size());
1042 auto w = mph.get_unique_ptr<real_type<T>>(matrix_size__);
1043
1044 ftn_int ione{1};
1045
1046 ftn_int m{-1};
1047 ftn_int nz{-1};
1048 real_type<T> d1;
1049 ftn_int info{-1};
1050
1051 ftn_int lwork{-1};
1052 ftn_int lrwork{-1};
1053 ftn_int liwork{-1};
1054
1055 T work1;
1056 real_type<T> rwork3[3];
1057 ftn_int iwork1;
1058
1059 /* work size query */
1060 if (std::is_same<T, std::complex<double>>::value) {
1061 FORTRAN(pzhegvx)
1062 (&ione, "V", "I", "U", &matrix_size__,
1063 reinterpret_cast<std::complex<double>*>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
1064 reinterpret_cast<std::complex<double>*>(B__.at(sddk::memory_t::host)), &ione, &ione, descb,
1065 reinterpret_cast<double*>(&d1), reinterpret_cast<double*>(&d1), &ione, &nev__, &abstol_, &m, &nz,
1066 reinterpret_cast<double*>(w.get()), &ortfac_,
1067 reinterpret_cast<std::complex<double>*>(Z__.at(sddk::memory_t::host)), &ione, &ione, descz,
1068 reinterpret_cast<std::complex<double>*>(&work1), &lwork, reinterpret_cast<double*>(rwork3),
1069 &lrwork, &iwork1, &liwork, ifail.get(), iclustr.get(), reinterpret_cast<double*>(gap.get()),
1070 &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
1071 } else if (std::is_same<T, std::complex<float>>::value) {
1072 FORTRAN(pchegvx)
1073 (&ione, "V", "I", "U", &matrix_size__,
1074 reinterpret_cast<std::complex<float>*>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
1075 reinterpret_cast<std::complex<float>*>(B__.at(sddk::memory_t::host)), &ione, &ione, descb,
1076 reinterpret_cast<float*>(&d1), reinterpret_cast<float*>(&d1), &ione, &nev__, &abstol_, &m, &nz,
1077 reinterpret_cast<float*>(w.get()), &ortfac_,
1078 reinterpret_cast<std::complex<float>*>(Z__.at(sddk::memory_t::host)), &ione, &ione, descz,
1079 reinterpret_cast<std::complex<float>*>(&work1), &lwork, reinterpret_cast<float*>(rwork3), &lrwork, &iwork1,
1080 &liwork, ifail.get(), iclustr.get(), reinterpret_cast<float*>(gap.get()), &info, (ftn_int)1,
1081 (ftn_int)1, (ftn_int)1);
1082 }
1083
1084 lwork = 2 * static_cast<int32_t>(work1.real()) + 4096;
1085 lrwork = 2 * static_cast<int32_t>(rwork3[0]) + 4096;
1086 liwork = 2 * iwork1 + 4096;
1087
1088 auto work = mph.get_unique_ptr<T>(lwork);
1089 auto rwork = mph.get_unique_ptr<real_type<T>>(lrwork);
1090 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
1091
1092 if (std::is_same<T, std::complex<double>>::value) {
1093 FORTRAN(pzhegvx)
1094 (&ione, "V", "I", "U", &matrix_size__, reinterpret_cast<std::complex<double>*>(A__.at(sddk::memory_t::host)), &ione,
1095 &ione, desca, reinterpret_cast<std::complex<double>*>(B__.at(sddk::memory_t::host)), &ione, &ione, descb,
1096 reinterpret_cast<double*>(&d1), reinterpret_cast<double*>(&d1), &ione, &nev__, &abstol_, &m, &nz,
1097 reinterpret_cast<double*>(w.get()), &ortfac_, reinterpret_cast<std::complex<double>*>(Z__.at(sddk::memory_t::host)),
1098 &ione, &ione, descz, reinterpret_cast<std::complex<double>*>(work.get()), &lwork,
1099 reinterpret_cast<double*>(rwork.get()), &lrwork, iwork.get(), &liwork, ifail.get(),
1100 iclustr.get(), reinterpret_cast<double*>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
1101 } else if (std::is_same<T, std::complex<float>>::value) {
1102 FORTRAN(pchegvx)
1103 (&ione, "V", "I", "U", &matrix_size__, reinterpret_cast<std::complex<float>*>(A__.at(sddk::memory_t::host)), &ione,
1104 &ione, desca, reinterpret_cast<std::complex<float>*>(B__.at(sddk::memory_t::host)), &ione, &ione, descb,
1105 reinterpret_cast<float*>(&d1), reinterpret_cast<float*>(&d1), &ione, &nev__, &abstol_, &m, &nz,
1106 reinterpret_cast<float*>(w.get()), &ortfac_, reinterpret_cast<std::complex<float>*>(Z__.at(sddk::memory_t::host)),
1107 &ione, &ione, descz, reinterpret_cast<std::complex<float>*>(work.get()), &lwork,
1108 reinterpret_cast<float*>(rwork.get()), &lrwork, iwork.get(), &liwork, ifail.get(),
1109 iclustr.get(), reinterpret_cast<float*>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
1110 }
1111
1112
1113 if ((m != nev__) || (nz != nev__)) {
1114 RTE_WARNING("Not all eigen-vectors or eigen-values are found.");
1115 return 1;
1116 }
1117
1118 if (info) {
1119 if ((info / 2) % 2) {
1120 std::stringstream s;
1121 s << "eigenvectors corresponding to one or more clusters of eigenvalues" << std::endl
1122 << "could not be reorthogonalized because of insufficient workspace" << std::endl;
1123
1124 int k = A__.blacs_grid().comm().size();
1125 for (int i = 0; i < A__.blacs_grid().comm().size() - 1; i++) {
1126 if ((iclustr.get()[2 * i + 1] != 0) && (iclustr.get()[2 * (i + 1)] == 0)) {
1127 k = i + 1;
1128 break;
1129 }
1130 }
1131
1132 s << "number of eigenvalue clusters : " << k << std::endl;
1133 for (int i = 0; i < k; i++) {
1134 s << iclustr.get()[2 * i] << " : " << iclustr.get()[2 * i + 1] << std::endl;
1135 }
1136 RTE_WARNING(s);
1137 }
1138
1139 std::stringstream s;
1140 if (std::is_same<T, std::complex<double>>::value) {
1141 s << "pzhegvx returned " << info;
1142 } else if (std::is_same<T, std::complex<float>>::value) {
1143 s << "pchegvx returned " << info;
1144 }
1145 RTE_WARNING(s);
1146 } else {
1147 std::copy(w.get(), w.get() + nev__, eval__);
1148 }
1149
1150 return info;
1151 }
1152
1153 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__, dmatrix<std::complex<double>>& B__,
1154 double* eval__, dmatrix<std::complex<double>>& Z__) override
1155 {
1156 PROFILE("Eigensolver_scalapack|pzhegvx");
1157 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1158 }
1159
1160 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<float>>& A__, dmatrix<std::complex<float>>& B__,
1161 float* eval__, dmatrix<std::complex<float>>& Z__) override
1162 {
1163 PROFILE("Eigensolver_scalapack|pchegvx");
1164 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1165 }
1166};
1167#else
1168class Eigensolver_scalapack : public Eigensolver
1169{
1170 public:
1171 Eigensolver_scalapack()
1172 : Eigensolver(ev_solver_t::scalapack, true, sddk::memory_t::host, sddk::memory_t::host)
1173 {
1174 }
1175};
1176#endif
1177
1178#ifdef SIRIUS_DLAF
1179class Eigensolver_dlaf : public Eigensolver
1180{
1181 public:
1182 Eigensolver_dlaf()
1183 : Eigensolver(ev_solver_t::dlaf, true, sddk::memory_t::host, sddk::memory_t::host)
1184 {
1185 }
1186
1187 static void initialize();
1188 static void finalize();
1189
1190 /// Solve a standard eigen-value problem for all eigen-pairs.
1191 //template <typename T, typename = std::enable_if_t<!std::is_scalar<T>::value>>
1192 template <typename T>
1193 int solve_(ftn_int matrix_size__, dmatrix<T>& A__, real_type<T>* eval__, dmatrix<T>& Z__)
1194 {
1195 DLAF_descriptor desca{matrix_size__, matrix_size__, A__.bs_row(), A__.bs_col(), 0, 0, 0, 0, static_cast<int>(A__.ld())};
1196 DLAF_descriptor descz{matrix_size__, matrix_size__, Z__.bs_row(), Z__.bs_col(), 0, 0, 0, 0, static_cast<int>(Z__.ld())};
1197
1198 if (std::is_same_v<T, std::complex<double>>) {
1199 return dlaf_hermitian_eigensolver_z(A__.blacs_grid().context(), 'L', reinterpret_cast<std::complex<double>*>(A__.at(sddk::memory_t::host)), desca, reinterpret_cast<double*>(eval__), reinterpret_cast<std::complex<double>*>(Z__.at(sddk::memory_t::host)), descz);
1200 } else if (std::is_same_v<T, std::complex<float>>) {
1201 return dlaf_hermitian_eigensolver_c(A__.blacs_grid().context(), 'L', reinterpret_cast<std::complex<float>*>(A__.at(sddk::memory_t::host)), desca, reinterpret_cast<float*>(eval__), reinterpret_cast<std::complex<float>*>(Z__.at(sddk::memory_t::host)), descz);
1202 }
1203 else if (std::is_same_v<T, double>){
1204 return dlaf_symmetric_eigensolver_d(A__.blacs_grid().context(), 'L', reinterpret_cast<double*>(A__.at(sddk::memory_t::host)), desca, reinterpret_cast<double*>(eval__), reinterpret_cast<double*>(Z__.at(sddk::memory_t::host)), descz);
1205 } else if (std::is_same_v<T, float>){
1206 return dlaf_symmetric_eigensolver_s(A__.blacs_grid().context(), 'L', reinterpret_cast<float*>(A__.at(sddk::memory_t::host)), desca, reinterpret_cast<float*>(eval__), reinterpret_cast<float*>(Z__.at(sddk::memory_t::host)), descz);
1207 }
1208 }
1209
1210 /// wrapper for solving a standard eigen-value problem for all eigen-pairs.
1211 int solve(ftn_int matrix_size__, dmatrix<std::complex<double>>& A__, double* eval__, dmatrix<std::complex<double>>& Z__) override
1212 {
1213 PROFILE("Eigensolver_dlaf|dlaf_hermitian_eigensolver_z");
1214 return solve_(matrix_size__, A__, eval__, Z__);
1215 }
1216
1217 int solve(ftn_int matrix_size__, dmatrix<std::complex<float>>& A__, float* eval__, dmatrix<std::complex<float>>& Z__) override
1218 {
1219 PROFILE("Eigensolver_dlaf|dlaf_hermitian_eigensolver_c");
1220 return solve_(matrix_size__, A__, eval__, Z__);
1221 }
1222
1223 int solve(ftn_int matrix_size__, dmatrix<double>& A__, double* eval__, dmatrix<double>& Z__) override
1224 {
1225 PROFILE("Eigensolver_dlaf|dlaf_symmetric_eigensolver_d");
1226 return solve_(matrix_size__, A__, eval__, Z__);
1227 }
1228
1229 int solve(ftn_int matrix_size__, dmatrix<float>& A__, float* eval__, dmatrix<float>& Z__) override
1230 {
1231 PROFILE("Eigensolver_dlaf|dlaf_symmetric_eigensolver_s");
1232 return solve_(matrix_size__, A__, eval__, Z__);
1233 }
1234
1235 /// Solve a standard eigen-value problem for N lowest eigen-pairs.
1236 template <typename T>
1237 int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix<T>& A__, real_type<T>* eval__, dmatrix<T>& Z__)
1238 {
1239 auto& mph = get_memory_pool(sddk::memory_t::host);
1240 auto w = mph.get_unique_ptr<real_type<T>>(matrix_size__);
1241
1242 auto info = solve_(matrix_size__, A__, w.get(), Z__);
1243
1244 std::copy(w.get(), w.get() + nev__, eval__);
1245
1246 return info;
1247 }
1248
1249 /// wrapper for solving a standard eigen-value problem for N lowest eigen-pairs.
1250 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__, double* eval__, dmatrix<std::complex<double>>& Z__) override
1251 {
1252 PROFILE("Eigensolver_dlaf|dlaf_hermitian_eigensolver_z");
1253 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1254 }
1255
1256 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<float>>& A__, float* eval__, dmatrix<std::complex<float>>& Z__) override
1257 {
1258 PROFILE("Eigensolver_dlaf|dlaf_hermitian_eigensolver_c");
1259 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1260 }
1261
1262 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__, double* eval__, dmatrix<double>& Z__) override
1263 {
1264 PROFILE("Eigensolver_dlaf|dlaf_symmetric_eigensolver_d");
1265 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1266 }
1267
1268 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<float>& A__, float* eval__, dmatrix<float>& Z__) override
1269 {
1270 PROFILE("Eigensolver_dlaf|dlaf_symmetric_eigensolver_s");
1271 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1272 }
1273
1274 /// Solve a generalized eigen-value problem for all eigen-pairs.
1275 template <typename T>
1276 int solve_(ftn_int matrix_size__, dmatrix<T>& A__, dmatrix<T>& B__, real_type<T>* eval__, dmatrix<T>& Z__){
1277 DLAF_descriptor desca{matrix_size__, matrix_size__, A__.bs_row(), A__.bs_col(), 0, 0, 0, 0, static_cast<int>(A__.ld())};
1278 DLAF_descriptor descb{matrix_size__, matrix_size__, B__.bs_row(), B__.bs_col(), 0, 0, 0, 0, static_cast<int>(B__.ld())};
1279 DLAF_descriptor descz{matrix_size__, matrix_size__, Z__.bs_row(), Z__.bs_col(), 0, 0, 0, 0, static_cast<int>(Z__.ld())};
1280
1281 if (std::is_same_v<T, std::complex<double>>) {
1282 return dlaf_hermitian_generalized_eigensolver_z(A__.blacs_grid().context(), 'L', reinterpret_cast<std::complex<double>*>(A__.at(sddk::memory_t::host)), desca, reinterpret_cast<std::complex<double>*>(B__.at(sddk::memory_t::host)), descb, reinterpret_cast<double*>(eval__), reinterpret_cast<std::complex<double>*>(Z__.at(sddk::memory_t::host)), descz);
1283 } else if (std::is_same_v<T, std::complex<float>>) {
1284 return dlaf_hermitian_generalized_eigensolver_c(A__.blacs_grid().context(), 'L', reinterpret_cast<std::complex<float>*>(A__.at(sddk::memory_t::host)), desca, reinterpret_cast<std::complex<float>*>(B__.at(sddk::memory_t::host)), descb, reinterpret_cast<float*>(eval__), reinterpret_cast<std::complex<float>*>(Z__.at(sddk::memory_t::host)), descz);
1285 }
1286 else if (std::is_same_v<T, double>){
1287 return dlaf_symmetric_generalized_eigensolver_d(A__.blacs_grid().context(), 'L', reinterpret_cast<double*>(A__.at(sddk::memory_t::host)), desca, reinterpret_cast<double*>(B__.at(sddk::memory_t::host)), descb, reinterpret_cast<double*>(eval__), reinterpret_cast<double*>(Z__.at(sddk::memory_t::host)), descz);
1288 } else if (std::is_same_v<T, float>){
1289 return dlaf_symmetric_generalized_eigensolver_s(A__.blacs_grid().context(), 'L', reinterpret_cast<float*>(A__.at(sddk::memory_t::host)), desca, reinterpret_cast<float*>(A__.at(sddk::memory_t::host)), descb, reinterpret_cast<float*>(eval__), reinterpret_cast<float*>(Z__.at(sddk::memory_t::host)), descz);
1290 }
1291 }
1292
1293 /// wrapper for solving a generalized eigen-value problem for all eigen-pairs.
1294 int solve(ftn_int matrix_size__, dmatrix<std::complex<double>>& A__, dmatrix<std::complex<double>>& B__, double* eval__, dmatrix<std::complex<double>>& Z__) override
1295 {
1296 PROFILE("Eigensolver_dlaf|dlaf_hermitian_generalized_eigensolver_z");
1297 return solve_(matrix_size__, A__, B__, eval__, Z__);
1298 }
1299
1300 int solve(ftn_int matrix_size__, dmatrix<std::complex<float>>& A__, dmatrix<std::complex<float>>& B__, float* eval__, dmatrix<std::complex<float>>& Z__) override
1301 {
1302 PROFILE("Eigensolver_dlaf|dlaf_hermitian_generalized_eigensolver_c");
1303 return solve_(matrix_size__, A__, B__, eval__, Z__);
1304 }
1305
1306 int solve(ftn_int matrix_size__, dmatrix<double>& A__, dmatrix<double>& B__, double* eval__, dmatrix<double>& Z__) override
1307 {
1308 PROFILE("Eigensolver_dlaf|dlaf_symmetric_generalized_eigensolver_d");
1309 return solve_(matrix_size__, A__, B__, eval__, Z__);
1310 }
1311
1312 int solve(ftn_int matrix_size__, dmatrix<float>& A__, dmatrix<float>& B__, float* eval__, dmatrix<float>& Z__) override
1313 {
1314 PROFILE("Eigensolver_dlaf|dlaf_symmetric_generalized_eigensolver_s");
1315 return solve_(matrix_size__, A__, B__, eval__, Z__);
1316 }
1317
1318 /// Solve a generalized eigen-value problem for N lowest eigen-pairs.
1319 template <typename T>
1320 int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix<T>& A__, dmatrix<T>& B__, real_type<T>* eval__, dmatrix<T>& Z__){
1321 auto& mph = get_memory_pool(sddk::memory_t::host);
1322 auto w = mph.get_unique_ptr<real_type<T>>(matrix_size__);
1323
1324 auto info = solve_(matrix_size__, A__, B__, w.get(), Z__);
1325
1326 std::copy(w.get(), w.get() + nev__, eval__);
1327
1328 return info;
1329 }
1330
1331 /// wrapper for solving a generalized eigen-value problem for N lowest eigen-pairs.
1332 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__, dmatrix<std::complex<double>>& B__, double* eval__, dmatrix<std::complex<double>>& Z__) override
1333 {
1334 PROFILE("Eigensolver_dlaf|dlaf_hermitian_generalized_eigensolver_z");
1335 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1336 }
1337
1338 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<float>>& A__, dmatrix<std::complex<float>>& B__, float* eval__, dmatrix<std::complex<float>>& Z__) override
1339 {
1340 PROFILE("Eigensolver_dlaf|dlaf_hermitian_generalized_eigensolver_c");
1341 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1342 }
1343
1344 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__, dmatrix<double>& B__, double* eval__, dmatrix<double>& Z__) override
1345 {
1346 PROFILE("Eigensolver_dlaf|dlaf_symmetric_generalized_eigensolver_d");
1347 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1348 }
1349
1350 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<float>& A__, dmatrix<float>& B__, float* eval__, dmatrix<float>& Z__) override
1351 {
1352 PROFILE("Eigensolver_dlaf|dlaf_symmetric_generalized_eigensolver_s");
1353 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1354 }
1355};
1356#else
1358{
1359 public:
1361 : Eigensolver(ev_solver_t::dlaf, true, sddk::memory_t::host, sddk::memory_t::host)
1362 {
1363 }
1364};
1365#endif
1366
1367#ifdef SIRIUS_MAGMA
1368class Eigensolver_magma: public Eigensolver
1369{
1370 public:
1371
1373 : Eigensolver(ev_solver_t::magma, false, sddk::memory_t::host_pinned, sddk::memory_t::host)
1374 {
1375 }
1376
1377 /// Solve a generalized eigen-value problem for N lowest eigen-pairs.
1378 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__, dmatrix<double>& B__, double* eval__,
1379 dmatrix<double>& Z__) override
1380 {
1381 PROFILE("Eigensolver_magma|dsygvdx");
1382
1383 int nt = omp_get_max_threads();
1384 int lda = A__.ld();
1385 int ldb = B__.ld();
1386
1387 auto& mph = get_memory_pool(sddk::memory_t::host);
1388 auto& mphp = get_memory_pool(sddk::memory_t::host_pinned);
1389 auto w = mph.get_unique_ptr<double>(matrix_size__);
1390
1391 int m;
1392 int info;
1393
1394 int lwork;
1395 int liwork;
1396 magma_dsyevdx_getworksize(matrix_size__, magma_get_parallel_numthreads(), 1, &lwork, &liwork);
1397
1398 auto h_work = mphp.get_unique_ptr<double>(lwork);
1399 auto iwork = mph.get_unique_ptr<magma_int_t>(liwork);
1400
1401 magma_dsygvdx_2stage(1, MagmaVec, MagmaRangeI, MagmaLower, matrix_size__, A__.at(sddk::memory_t::host), lda,
1402 B__.at(sddk::memory_t::host), ldb, 0.0, 0.0, 1, nev__, &m, w.get(), h_work.get(), lwork,
1403 iwork.get(), liwork, &info);
1404
1405
1406 if (nt != omp_get_max_threads()) {
1407 RTE_THROW("magma has changed the number of threads");
1408 }
1409
1410 if (m < nev__) {
1411 return 1;
1412 }
1413
1414 if (!info) {
1415 std::copy(w.get(), w.get() + nev__, eval__);
1416 #pragma omp parallel for schedule(static)
1417 for (int i = 0; i < nev__; i++) {
1418 std::copy(A__.at(sddk::memory_t::host, 0, i), A__.at(sddk::memory_t::host, 0, i) + matrix_size__,
1419 Z__.at(sddk::memory_t::host, 0, i));
1420 }
1421 }
1422
1423 return info;
1424 }
1425
1426 /// Solve a generalized eigen-value problem for N lowest eigen-pairs.
1427 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__, dmatrix<std::complex<double>>& B__,
1428 double* eval__, dmatrix<std::complex<double>>& Z__) override
1429 {
1430 PROFILE("Eigensolver_magma|zhegvdx");
1431
1432 int nt = omp_get_max_threads();
1433 int lda = A__.ld();
1434 int ldb = B__.ld();
1435
1436 auto& mph = get_memory_pool(sddk::memory_t::host);
1437 auto& mphp = get_memory_pool(sddk::memory_t::host_pinned);
1438 auto w = mph.get_unique_ptr<double>(matrix_size__);
1439
1440 int m;
1441 int info;
1442
1443 int lwork;
1444 int lrwork;
1445 int liwork;
1446 magma_zheevdx_getworksize(matrix_size__, magma_get_parallel_numthreads(), 1, &lwork, &lrwork, &liwork);
1447
1448 auto h_work = mphp.get_unique_ptr<std::complex<double>>(lwork);
1449 auto rwork = mphp.get_unique_ptr<double>(lrwork);
1450 auto iwork = mph.get_unique_ptr<magma_int_t>(liwork);
1451
1452 magma_zhegvdx_2stage(1, MagmaVec, MagmaRangeI, MagmaLower, matrix_size__,
1453 reinterpret_cast<magmaDoubleComplex*>(A__.at(sddk::memory_t::host)), lda,
1454 reinterpret_cast<magmaDoubleComplex*>(B__.at(sddk::memory_t::host)), ldb, 0.0, 0.0,
1455 1, nev__, &m, w.get(), reinterpret_cast<magmaDoubleComplex*>(h_work.get()), lwork,
1456 rwork.get(), lrwork, iwork.get(), liwork, &info);
1457
1458 if (nt != omp_get_max_threads()) {
1459 RTE_THROW("magma has changed the number of threads");
1460 }
1461
1462 if (m < nev__) {
1463 return 1;
1464 }
1465
1466 if (!info) {
1467 std::copy(w.get(), w.get() + nev__, eval__);
1468 #pragma omp parallel for schedule(static)
1469 for (int i = 0; i < nev__; i++) {
1470 std::copy(A__.at(sddk::memory_t::host, 0, i), A__.at(sddk::memory_t::host, 0, i) + matrix_size__,
1471 Z__.at(sddk::memory_t::host, 0, i));
1472 }
1473 }
1474
1475 return info;
1476 }
1477
1478 /// Solve a standard eigen-value problem for N lowest eigen-pairs.
1479 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__, double* eval__, dmatrix<double>& Z__) override
1480 {
1481 PROFILE("Eigensolver_magma|dsygvdx");
1482
1483 // Bug in magma for small matrix sizes -> call lapack instead as workaround
1484 if (matrix_size__ <= 128) {
1485 return Eigensolver_lapack().solve(matrix_size__, nev__, A__, eval__, Z__);
1486 }
1487
1488 auto& mph = get_memory_pool(sddk::memory_t::host);
1489 auto& mphp = get_memory_pool(sddk::memory_t::host_pinned);
1490
1491 int nt = omp_get_max_threads();
1492 int lda = A__.ld();
1493 auto w = mph.get_unique_ptr<double>(matrix_size__);
1494
1495 int lwork;
1496 int liwork;
1497 magma_dsyevdx_getworksize(matrix_size__, magma_get_parallel_numthreads(), 1, &lwork, &liwork);
1498
1499 auto h_work = mphp.get_unique_ptr<double>(lwork);
1500 auto iwork = mph.get_unique_ptr<magma_int_t>(liwork);
1501
1502 int info;
1503 int m;
1504
1505 magma_dsyevdx(MagmaVec, MagmaRangeI, MagmaLower, matrix_size__, A__.at(sddk::memory_t::host), lda, 0.0, 0.0, 1,
1506 nev__, &m, w.get(), h_work.get(), lwork, iwork.get(), liwork, &info);
1507
1508 if (nt != omp_get_max_threads()) {
1509 RTE_THROW("magma has changed the number of threads");
1510 }
1511
1512 if (m < nev__) {
1513 return 1;
1514 }
1515
1516 if (!info) {
1517 std::copy(w.get(), w.get() + nev__, eval__);
1518 #pragma omp parallel for schedule(static)
1519 for (int i = 0; i < nev__; i++) {
1520 std::copy(A__.at(sddk::memory_t::host, 0, i), A__.at(sddk::memory_t::host, 0, i) + matrix_size__,
1521 Z__.at(sddk::memory_t::host, 0, i));
1522 }
1523 }
1524
1525 return info;
1526 }
1527
1528 /// Solve a standard eigen-value problem for N lowest eigen-pairs.
1529 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__, double* eval__, dmatrix<std::complex<double>>& Z__) override
1530 {
1531 PROFILE("Eigensolver_magma|zheevdx");
1532
1533 int nt = omp_get_max_threads();
1534 int lda = A__.ld();
1535 auto& mph = get_memory_pool(sddk::memory_t::host);
1536 auto& mphp = get_memory_pool(sddk::memory_t::host_pinned);
1537 auto w = mph.get_unique_ptr<double>(matrix_size__);
1538
1539 int info, m;
1540
1541 int lwork;
1542 int lrwork;
1543 int liwork;
1544 magma_zheevdx_getworksize(matrix_size__, magma_get_parallel_numthreads(), 1, &lwork, &lrwork, &liwork);
1545
1546 auto h_work = mphp.get_unique_ptr<std::complex<double>>(lwork);
1547 auto rwork = mphp.get_unique_ptr<double>(lrwork);
1548 auto iwork = mph.get_unique_ptr<magma_int_t>(liwork);
1549
1550 magma_zheevdx_2stage(MagmaVec, MagmaRangeI, MagmaLower, matrix_size__,
1551 reinterpret_cast<magmaDoubleComplex*>(A__.at(sddk::memory_t::host)), lda, 0.0, 0.0, 1,
1552 nev__, &m, w.get(), reinterpret_cast<magmaDoubleComplex*>(h_work.get()), lwork, rwork.get(),
1553 lrwork, iwork.get(), liwork, &info);
1554
1555 if (nt != omp_get_max_threads()) {
1556 RTE_THROW("magma has changed the number of threads");
1557 }
1558
1559 if (m < nev__) {
1560 return 1;
1561 }
1562
1563 if (!info) {
1564 std::copy(w.get(), w.get() + nev__, eval__);
1565 #pragma omp parallel for schedule(static)
1566 for (int i = 0; i < nev__; i++) {
1567 std::copy(A__.at(sddk::memory_t::host, 0, i), A__.at(sddk::memory_t::host, 0, i) + matrix_size__,
1568 Z__.at(sddk::memory_t::host, 0, i));
1569 }
1570 }
1571
1572 return info;
1573 }
1574};
1575
1576class Eigensolver_magma_gpu: public Eigensolver
1577{
1578 public:
1579 Eigensolver_magma_gpu()
1580 : Eigensolver(ev_solver_t::magma, false, sddk::memory_t::host_pinned, sddk::memory_t::device)
1581 {
1582 }
1583
1584 /// Solve a hermitian eigen-value problem for N lowest eigen-pairs.
1585 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__, double* eval__,
1586 dmatrix<std::complex<double>>& Z__) override
1587 {
1588 PROFILE("Eigensolver_magma_gpu|zheevdx");
1589
1590 int nt = omp_get_max_threads();
1591 int lda = A__.ld();
1592 auto& mph = get_memory_pool(sddk::memory_t::host);
1593 auto& mphp = get_memory_pool(sddk::memory_t::host_pinned);
1594 auto w = mph.get_unique_ptr<double>(matrix_size__);
1595
1596 acc::copyin(A__.at(sddk::memory_t::device), A__.ld(), A__.at(sddk::memory_t::host), A__.ld(), matrix_size__,
1597 matrix_size__);
1598
1599 int info, m;
1600
1601 int lwork;
1602 int lrwork;
1603 int liwork;
1604 magma_zheevdx_getworksize(matrix_size__, magma_get_parallel_numthreads(), 1, &lwork, &lrwork, &liwork);
1605
1606 int llda = matrix_size__ + 32;
1607 auto z_work = mphp.get_unique_ptr<std::complex<double>>(llda * matrix_size__);
1608
1609 auto h_work = mphp.get_unique_ptr<std::complex<double>>(lwork);
1610 auto rwork = mphp.get_unique_ptr<double>(lrwork);
1611 auto iwork = mph.get_unique_ptr<magma_int_t>(liwork);
1612
1613 magma_zheevdx_gpu(MagmaVec, MagmaRangeI, MagmaLower, matrix_size__,
1614 reinterpret_cast<magmaDoubleComplex*>(A__.at(sddk::memory_t::device)), lda, 0.0, 0.0, 1,
1615 nev__, &m, w.get(), reinterpret_cast<magmaDoubleComplex*>(z_work.get()), llda,
1616 reinterpret_cast<magmaDoubleComplex*>(h_work.get()), lwork, rwork.get(), lrwork, iwork.get(),
1617 liwork, &info);
1618
1619 if (nt != omp_get_max_threads()) {
1620 RTE_THROW("magma has changed the number of threads");
1621 }
1622
1623 if (m < nev__) {
1624 return 1;
1625 }
1626
1627 if (!info) {
1628 std::copy(w.get(), w.get() + nev__, eval__);
1629 acc::copyout(Z__.at(sddk::memory_t::host), Z__.ld(), A__.at(sddk::memory_t::device), A__.ld(),
1630 matrix_size__, nev__);
1631 }
1632
1633 return info;
1634 }
1635
1636 /// Solve a symmetric eigen-value problem for N lower eigen-pairs.
1637 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__, double* eval__, dmatrix<double>& Z__) override
1638 {
1639 PROFILE("Eigensolver_magma_gpu|dsyevdx");
1640
1641 int nt = omp_get_max_threads();
1642 int lda = A__.ld();
1643 auto& mph = get_memory_pool(sddk::memory_t::host);
1644 auto& mphp = get_memory_pool(sddk::memory_t::host_pinned);
1645 auto w = mph.get_unique_ptr<double>(matrix_size__);
1646
1647 acc::copyin(A__.at(sddk::memory_t::device), A__.ld(), A__.at(sddk::memory_t::host), A__.ld(), matrix_size__,
1648 matrix_size__);
1649
1650 int info, m;
1651
1652 int lwork;
1653 int liwork;
1654 magma_dsyevdx_getworksize(matrix_size__, magma_get_parallel_numthreads(), 1, &lwork, &liwork);
1655
1656 int llda = matrix_size__ + 32;
1657 auto z_work = mphp.get_unique_ptr<double>(llda * matrix_size__);
1658 auto h_work = mphp.get_unique_ptr<double>(lwork);
1659 auto iwork = mph.get_unique_ptr<magma_int_t>(liwork);
1660
1661 magma_dsyevdx_gpu(MagmaVec /*jobz*/, MagmaRangeI /*range*/, MagmaLower /*uplo*/, matrix_size__ /*n*/,
1662 A__.at(sddk::memory_t::device) /*dA*/, lda /*ldda*/, 0.0 /*vl*/, 0.0 /*vu*/, 1 /*il*/,
1663 nev__ /*iu*/, &m /*mout*/, w.get() /*w*/, z_work.get() /*wA*/, llda /*ldwa*/,
1664 h_work.get() /*work*/, lwork /*lwork*/, iwork.get() /*iwork*/, liwork /*liwork*/,
1665 &info /*info*/);
1666
1667 if (nt != omp_get_max_threads()) {
1668 RTE_THROW("magma has changed the number of threads");
1669 }
1670
1671 if (m < nev__) {
1672 return 1;
1673 }
1674
1675 if (!info) {
1676 std::copy(w.get(), w.get() + nev__, eval__);
1677 acc::copyout(Z__.at(sddk::memory_t::host), Z__.ld(), A__.at(sddk::memory_t::device), A__.ld(),
1678 matrix_size__, nev__);
1679 }
1680
1681 return info;
1682 }
1683
1684};
1685#else
1687{
1688 public:
1690 : Eigensolver(ev_solver_t::magma, false, sddk::memory_t::host_pinned, sddk::memory_t::host)
1691 {
1692 }
1693};
1694
1696{
1697 public:
1699 : Eigensolver(ev_solver_t::magma, false, sddk::memory_t::host_pinned, sddk::memory_t::device)
1700 {
1701 }
1702};
1703#endif
1704
1705#if defined(SIRIUS_CUDA)
1707{
1708 public:
1710 : Eigensolver(ev_solver_t::cusolver, false, sddk::memory_t::host_pinned, sddk::memory_t::device)
1711 {
1712 }
1713
1714 template <typename T>
1715 int solve_(ftn_int matrix_size__, int nev__, dmatrix<T>& A__, real_type<T>* eval__, dmatrix<T>& Z__)
1716 {
1717 cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_VECTOR;
1718 cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER;
1719 cusolverEigRange_t range = CUSOLVER_EIG_RANGE_I;
1720
1721 auto& mpd = get_memory_pool(sddk::memory_t::device);
1722 auto w = mpd.get_unique_ptr<real_type<T>>(matrix_size__);
1723 acc::copyin(A__.at(sddk::memory_t::device), A__.ld(), A__.at(sddk::memory_t::host), A__.ld(), matrix_size__, matrix_size__);
1724
1725 int lwork;
1726 int h_meig;
1727 auto vl = -std::numeric_limits<real_type<T>>::infinity();
1728 auto vu = std::numeric_limits<real_type<T>>::infinity();
1729
1730 if (std::is_same<T, double>::value) {
1731 CALL_CUSOLVER(cusolverDnDsyevdx_bufferSize, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1732 reinterpret_cast<double*>(A__.at(sddk::memory_t::device)), A__.ld(),
1733 vl, vu, 1, nev__, &h_meig, reinterpret_cast<double*>(w.get()), &lwork));
1734 } else if (std::is_same<T, float>::value) {
1735 CALL_CUSOLVER(cusolverDnSsyevdx_bufferSize, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1736 reinterpret_cast<float*>(A__.at(sddk::memory_t::device)), A__.ld(), vl,
1737 vu, 1, nev__, &h_meig, reinterpret_cast<float*>(w.get()), &lwork));
1738 } else if (std::is_same<T, std::complex<double>>::value) {
1739 CALL_CUSOLVER(cusolverDnZheevdx_bufferSize, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1740 reinterpret_cast<cuDoubleComplex*>(A__.at(sddk::memory_t::device)),
1741 A__.ld(), vl, vu, 1, nev__, &h_meig, reinterpret_cast<double*>(w.get()), &lwork));
1742 } else if (std::is_same<T, std::complex<float>>::value) {
1743 CALL_CUSOLVER(cusolverDnCheevdx_bufferSize, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1744 reinterpret_cast<cuFloatComplex*>(A__.at(sddk::memory_t::device)),
1745 A__.ld(), vl, vu, 1, nev__, &h_meig, reinterpret_cast<float*>(w.get()), &lwork));
1746 }
1747
1748 auto work = mpd.get_unique_ptr<T>(lwork);
1749
1750 int info;
1751 auto dinfo = mpd.get_unique_ptr<int>(1);
1752 if (std::is_same<T, double>::value) {
1753 CALL_CUSOLVER(cusolverDnDsyevdx, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1754 reinterpret_cast<double*>(A__.at(sddk::memory_t::device)), A__.ld(), vl, vu, 1, nev__, &h_meig,
1755 reinterpret_cast<double*>(w.get()), reinterpret_cast<double*>(work.get()), lwork, dinfo.get()));
1756 } else if (std::is_same<T, float>::value) {
1757 CALL_CUSOLVER(cusolverDnSsyevdx, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1758 reinterpret_cast<float*>(A__.at(sddk::memory_t::device)), A__.ld(), vl, vu, 1, nev__, &h_meig,
1759 reinterpret_cast<float*>(w.get()), reinterpret_cast<float*>(work.get()), lwork, dinfo.get()));
1760 } else if (std::is_same<T, std::complex<double>>::value) {
1761 CALL_CUSOLVER(cusolverDnZheevdx, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1762 reinterpret_cast<cuDoubleComplex*>(A__.at(sddk::memory_t::device)), A__.ld(),
1763 vl, vu, 1, nev__, &h_meig, reinterpret_cast<double*>(w.get()),
1764 reinterpret_cast<cuDoubleComplex*>(work.get()), lwork, dinfo.get()));
1765 } else if (std::is_same<T, std::complex<float>>::value) {
1766 CALL_CUSOLVER(cusolverDnCheevdx, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1767 reinterpret_cast<cuFloatComplex*>(A__.at(sddk::memory_t::device)), A__.ld(),
1768 vl, vu, 1, nev__, &h_meig, reinterpret_cast<float*>(w.get()),
1769 reinterpret_cast<cuFloatComplex*>(work.get()), lwork, dinfo.get()));
1770 }
1771
1772 acc::copyout(&info, dinfo.get(), 1);
1773 if (!info) {
1774 acc::copyout(eval__, w.get(), nev__);
1775 acc::copyout(Z__.at(sddk::memory_t::host), Z__.ld(), A__.at(sddk::memory_t::device), A__.ld(), matrix_size__, nev__);
1776 }
1777 return info;
1778 }
1779
1780 /// wrapper for dynamic binding
1781 int solve(ftn_int matrix_size__, int nev__, dmatrix<float>& A__, float* eval__, dmatrix<float>& Z__) override
1782 {
1783 PROFILE("Eigensolver_cuda|dsyevdx");
1784 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1785 }
1786
1787 int solve(ftn_int matrix_size__, int nev__, dmatrix<double>& A__, double* eval__, dmatrix<double>& Z__) override
1788 {
1789 PROFILE("Eigensolver_cuda|ssyevdx");
1790 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1791 }
1792
1793 int solve(ftn_int matrix_size__, int nev__, dmatrix<std::complex<float>>& A__, float* eval__, dmatrix<std::complex<float>>& Z__) override
1794 {
1795 PROFILE("Eigensolver_cuda|cheevdx");
1796 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1797 }
1798
1799 int solve(ftn_int matrix_size__, int nev__, dmatrix<std::complex<double>>& A__, double* eval__, dmatrix<std::complex<double>>& Z__) override
1800 {
1801 PROFILE("Eigensolver_cuda|zheevdx");
1802 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1803 }
1804
1805 template <typename T>
1806 int solve_(ftn_int matrix_size__, int nev__, dmatrix<T>& A__, dmatrix<T>& B__, real_type<T>* eval__, dmatrix<T>& Z__)
1807 {
1808 cusolverEigType_t itype = CUSOLVER_EIG_TYPE_1; // A*x = (lambda)*B*x
1809 cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_VECTOR;
1810 cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER;
1811 cusolverEigRange_t range = CUSOLVER_EIG_RANGE_I;
1812
1813 auto& mpd = get_memory_pool(sddk::memory_t::device);
1814 auto w = mpd.get_unique_ptr<real_type<T>>(matrix_size__);
1815 acc::copyin(A__.at(sddk::memory_t::device), A__.ld(), A__.at(sddk::memory_t::host), A__.ld(), matrix_size__, matrix_size__);
1816 acc::copyin(B__.at(sddk::memory_t::device), B__.ld(), B__.at(sddk::memory_t::host), B__.ld(), matrix_size__, matrix_size__);
1817
1818 int lwork;
1819 int h_meig;
1820 auto vl = -std::numeric_limits<real_type<T>>::infinity();
1821 auto vu = std::numeric_limits<real_type<T>>::infinity();
1822
1823 if (std::is_same<T, double>::value) {
1824 CALL_CUSOLVER(cusolverDnDsygvdx_bufferSize, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1825 reinterpret_cast<double*>(A__.at(sddk::memory_t::device)), A__.ld(),
1826 reinterpret_cast<double*>(B__.at(sddk::memory_t::device)), B__.ld(),
1827 vl, vu, 1, nev__, &h_meig, reinterpret_cast<double*>(w.get()), &lwork));
1828 } else if (std::is_same<T, float>::value) {
1829 CALL_CUSOLVER(cusolverDnSsygvdx_bufferSize, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1830 reinterpret_cast<float*>(A__.at(sddk::memory_t::device)), A__.ld(),
1831 reinterpret_cast<float*>(B__.at(sddk::memory_t::device)), B__.ld(),
1832 vl, vu, 1, nev__, &h_meig, reinterpret_cast<float*>(w.get()), &lwork));
1833 } else if (std::is_same<T, std::complex<double>>::value) {
1834 CALL_CUSOLVER(cusolverDnZhegvdx_bufferSize, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1835 reinterpret_cast<cuDoubleComplex*>(A__.at(sddk::memory_t::device)), A__.ld(),
1836 reinterpret_cast<cuDoubleComplex*>(B__.at(sddk::memory_t::device)), B__.ld(),
1837 vl, vu, 1, nev__, &h_meig, reinterpret_cast<double*>(w.get()), &lwork));
1838 } else if (std::is_same<T, std::complex<float>>::value) {
1839 CALL_CUSOLVER(cusolverDnChegvdx_bufferSize, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1840 reinterpret_cast<cuFloatComplex*>(A__.at(sddk::memory_t::device)), A__.ld(),
1841 reinterpret_cast<cuFloatComplex*>(B__.at(sddk::memory_t::device)), B__.ld(),
1842 vl, vu, 1, nev__, &h_meig, reinterpret_cast<float*>(w.get()), &lwork));
1843 }
1844
1845 auto work = mpd.get_unique_ptr<T>(lwork);
1846
1847 int info;
1848 auto dinfo = mpd.get_unique_ptr<int>(1);
1849 if (std::is_same<T, double>::value) {
1850 CALL_CUSOLVER(cusolverDnDsygvdx, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1851 reinterpret_cast<double*>(A__.at(sddk::memory_t::device)), A__.ld(),
1852 reinterpret_cast<double*>(B__.at(sddk::memory_t::device)), B__.ld(),
1853 vl, vu, 1, nev__, &h_meig, reinterpret_cast<double*>(w.get()),
1854 reinterpret_cast<double*>(work.get()), lwork, dinfo.get()));
1855 } else if (std::is_same<T, float>::value) {
1856 CALL_CUSOLVER(cusolverDnSsygvdx, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1857 reinterpret_cast<float*>(A__.at(sddk::memory_t::device)), A__.ld(),
1858 reinterpret_cast<float*>(B__.at(sddk::memory_t::device)), B__.ld(),
1859 vl, vu, 1, nev__, &h_meig, reinterpret_cast<float*>(w.get()),
1860 reinterpret_cast<float*>(work.get()), lwork, dinfo.get()));
1861 } else if (std::is_same<T, std::complex<double>>::value) {
1862 CALL_CUSOLVER(cusolverDnZhegvdx, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1863 reinterpret_cast<cuDoubleComplex*>(A__.at(sddk::memory_t::device)), A__.ld(),
1864 reinterpret_cast<cuDoubleComplex*>(B__.at(sddk::memory_t::device)), B__.ld(),
1865 vl, vu, 1, nev__, &h_meig, reinterpret_cast<double*>(w.get()),
1866 reinterpret_cast<cuDoubleComplex*>(work.get()), lwork, dinfo.get()));
1867 } else if (std::is_same<T, std::complex<float>>::value) {
1868 CALL_CUSOLVER(cusolverDnChegvdx, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1869 reinterpret_cast<cuFloatComplex*>(A__.at(sddk::memory_t::device)), A__.ld(),
1870 reinterpret_cast<cuFloatComplex*>(B__.at(sddk::memory_t::device)), B__.ld(),
1871 vl, vu, 1, nev__, &h_meig, reinterpret_cast<float*>(w.get()),
1872 reinterpret_cast<cuFloatComplex*>(work.get()), lwork, dinfo.get()));
1873 }
1874
1875 acc::copyout(&info, dinfo.get(), 1);
1876 if (!info) {
1877 acc::copyout(eval__, w.get(), nev__);
1878 acc::copyout(Z__.at(sddk::memory_t::host), Z__.ld(), A__.at(sddk::memory_t::device), A__.ld(), matrix_size__, nev__);
1879 }
1880 return info;
1881 }
1882
1883 /// wrapper for dynamic binding
1884 int solve(ftn_int matrix_size__, int nev__, dmatrix<double>& A__, dmatrix<double>& B__, double* eval__, dmatrix<double>& Z__) override
1885 {
1886 PROFILE("Eigensolver_cuda|dsygvdx");
1887 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1888 }
1889
1890 int solve(ftn_int matrix_size__, int nev__, dmatrix<float>& A__, dmatrix<float>& B__, float* eval__, dmatrix<float>& Z__) override
1891 {
1892 PROFILE("Eigensolver_cuda|ssygvdx");
1893 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1894 }
1895
1896 int solve(ftn_int matrix_size__, int nev__, dmatrix<std::complex<double>>& A__, dmatrix<std::complex<double>>& B__,
1897 double* eval__, dmatrix<std::complex<double>>& Z__) override
1898 {
1899 PROFILE("Eigensolver_cuda|zhegvdx");
1900 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1901 }
1902
1903 int solve(ftn_int matrix_size__, int nev__, dmatrix<std::complex<float>>& A__, dmatrix<std::complex<float>>& B__,
1904 float* eval__, dmatrix<std::complex<float>>& Z__) override
1905 {
1906 PROFILE("Eigensolver_cuda|chegvdx");
1907 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1908 }
1909
1910 /// Solve a standard eigen-value problem for all eigen-pairs.
1911 int solve(ftn_int matrix_size__, dmatrix<double>& A__, dmatrix<double>& B__, double* eval__, dmatrix<double>& Z__) override
1912 {
1913 return solve(matrix_size__, matrix_size__, A__, B__, eval__, Z__);
1914 }
1915
1916 int solve(ftn_int matrix_size__, dmatrix<float>& A__, dmatrix<float>& B__, float* eval__, dmatrix<float>& Z__) override
1917 {
1918 return solve(matrix_size__, matrix_size__, A__, B__, eval__, Z__);
1919 }
1920
1921 int solve(ftn_int matrix_size__, dmatrix<std::complex<double>>& A__, dmatrix<std::complex<double>>& B__, double* eval__,
1922 dmatrix<std::complex<double>>& Z__) override
1923 {
1924 return solve(matrix_size__, matrix_size__, A__, B__, eval__, Z__);
1925 }
1926
1927 int solve(ftn_int matrix_size__, dmatrix<std::complex<float>>& A__, dmatrix<std::complex<float>>& B__, float* eval__,
1928 dmatrix<std::complex<float>>& Z__) override
1929 {
1930 return solve(matrix_size__, matrix_size__, A__, B__, eval__, Z__);
1931 }
1932};
1933#else
1934class Eigensolver_cuda: public Eigensolver
1935{
1936 public:
1937 Eigensolver_cuda()
1938 : Eigensolver(ev_solver_t::cusolver, false, sddk::memory_t::host_pinned, sddk::memory_t::device)
1939 {
1940 }
1941};
1942#endif
1943
1944} // namespace
1945
1946} // namespace sirius
1947
1948#endif // __EIGENPROBLEM_HPP__
int solve(ftn_int matrix_size__, int nev__, dmatrix< float > &A__, float *eval__, dmatrix< float > &Z__) override
wrapper for dynamic binding
int solve(ftn_int matrix_size__, dmatrix< std::complex< float > > &A__, dmatrix< std::complex< float > > &B__, float *eval__, dmatrix< std::complex< float > > &Z__) override
Solve a generalized eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, int nev__, dmatrix< double > &A__, dmatrix< double > &B__, double *eval__, dmatrix< double > &Z__) override
wrapper for dynamic binding
int solve(ftn_int matrix_size__, dmatrix< std::complex< double > > &A__, dmatrix< std::complex< double > > &B__, double *eval__, dmatrix< std::complex< double > > &Z__) override
Solve a generalized eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< float > &A__, dmatrix< float > &B__, float *eval__, dmatrix< float > &Z__) override
Solve a generalized eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< double > &A__, dmatrix< double > &B__, double *eval__, dmatrix< double > &Z__) override
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< double > &A__, dmatrix< double > &B__, double *eval__, dmatrix< double > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< float > > &A__, float *eval__, dmatrix< std::complex< float > > &Z__) override
Solve a standard eigen-value problem of a sub-matrix for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< double > > &A__, dmatrix< std::complex< double > > &B__, double *eval__, dmatrix< std::complex< double > > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix< T > &A__, real_type< T > *eval__, dmatrix< T > &Z__)
Solve a standard eigen-value problem for N lowest eigen-pairs.
int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix< T > &A__, dmatrix< T > &B__, real_type< T > *eval__, dmatrix< T > &Z__)
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< float > &A__, dmatrix< float > &B__, float *eval__, dmatrix< float > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< double > &A__, double *eval__, dmatrix< double > &Z__) override
wrapper for solving a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< float > &A__, float *eval__, dmatrix< float > &Z__) override
Solve a standard eigen-value problem of a sub-matrix for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< std::complex< float > > &A__, float *eval__, dmatrix< std::complex< float > > &Z__) override
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< double > &A__, dmatrix< double > &B__, double *eval__, dmatrix< double > &Z__) override
wrapper for solving a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< float > &A__, float *eval__, dmatrix< float > &Z__) override
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< double > &A__, double *eval__, dmatrix< double > &Z__) override
wrapper for solving a standard eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< std::complex< double > > &A__, double *eval__, dmatrix< std::complex< double > > &Z__) override
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< float > > &A__, dmatrix< std::complex< float > > &B__, float *eval__, dmatrix< std::complex< float > > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< double > > &A__, double *eval__, dmatrix< std::complex< double > > &Z__) override
Solve a standard eigen-value problem of a sub-matrix for N lowest eigen-pairs.
int solve_(ftn_int matrix_size__, dmatrix< T > &A__, real_type< T > *eval__, dmatrix< T > &Z__)
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< float > > &A__, dmatrix< std::complex< float > > &B__, float *eval__, dmatrix< std::complex< float > > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix< T > &A__, real_type< T > *eval__, dmatrix< T > &Z__)
Solve a standard eigen-value problem for N lowest eigen-pairs.
int solve_(ftn_int matrix_size__, dmatrix< T > &A__, real_type< T > *eval__, dmatrix< T > &Z__)
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< double > &A__, dmatrix< double > &B__, double *eval__, dmatrix< double > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix< T > &A__, dmatrix< T > &B__, real_type< T > *eval__, dmatrix< T > &Z__)
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< double > > &A__, dmatrix< std::complex< double > > &B__, double *eval__, dmatrix< std::complex< double > > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< double > &A__, double *eval__, dmatrix< double > &Z__) override
Solve a standard eigen-value problem of a sub-matrix for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< std::complex< float > > &A__, float *eval__, dmatrix< std::complex< float > > &Z__) override
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< float > &A__, float *eval__, dmatrix< float > &Z__) override
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< double > &A__, double *eval__, dmatrix< double > &Z__) override
Solve a standard eigen-value problem for all eigen-pairs.
int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix< T > &A__, dmatrix< T > &B__, T *eval__, dmatrix< T > &Z__)
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< double > > &A__, double *eval__, dmatrix< std::complex< double > > &Z__) override
Solve a standard eigen-value problem of a sub-matrix for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< float > &A__, dmatrix< float > &B__, float *eval__, dmatrix< float > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix< T > &A__, T *eval__, dmatrix< T > &Z__)
Solve a standard eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< float > > &A__, float *eval__, dmatrix< std::complex< float > > &Z__) override
Solve a standard eigen-value problem of a sub-matrix for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< std::complex< double > > &A__, double *eval__, dmatrix< std::complex< double > > &Z__) override
wrapper for solving a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< float > &A__, float *eval__, dmatrix< float > &Z__) override
Solve a standard eigen-value problem of a sub-matrix for N lowest eigen-pairs.
Interface to different eigen-solvers.
Definition: eigensolver.hpp:81
virtual int solve(ftn_int matrix_size__, dmatrix< double > &A__, double *eval__, dmatrix< double > &Z__)
Solve a standard eigen-value problem for all eigen-pairs.
Eigensolver(ev_solver_t type__, bool is_parallel__, sddk::memory_t host_memory_t__, sddk::memory_t data_memory_t__)
Constructor.
Definition: eigensolver.hpp:99
Distributed matrix.
Definition: dmatrix.hpp:56
int bs_row() const
Row blocking factor.
Definition: dmatrix.hpp:301
int bs_col() const
Column blocking factor.
Definition: dmatrix.hpp:307
uint32_t ld() const
Return leading dimension size.
Definition: memory.hpp:1233
Interface to CUDA eigen-solver library.
Contains definition of eigensolver factory.
Linear algebra interface.
Interface to some of the MAGMA functions.
memory_t
Memory types where the code can store data.
Definition: memory.hpp:71
@ host_pinned
Pinned host memory. This is host memory + extra bit flag.
void copyout(T *target__, T const *source__, size_t n__)
Copy memory from device to host.
Definition: acc.hpp:367
void copyin(T *target__, T const *source__, size_t n__)
Copy memory from host to device.
Definition: acc.hpp:337
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
Definition: acc.hpp:320
ev_solver_t
Type of eigen-value solver.
Definition: eigensolver.hpp:37
@ dlaf
DLA-Future solver.
@ cusolver
CUDA eigen-solver.
@ magma
MAGMA with CPU pointers.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
void initialize(bool call_mpi_init__=true)
Initialize the library.
Definition: sirius.hpp:82
void finalize(bool call_mpi_fin__=true, bool reset_device__=true, bool fftw_cleanup__=true)
Shut down the library.
Definition: sirius.hpp:143
Add or substitute OMP functions.
A time-based profiler.
Eror and warning handling during run-time execution.