25#ifndef __EIGENPROBLEM_HPP__
26#define __EIGENPROBLEM_HPP__
33#if defined(SIRIUS_GPU) && defined(SIRIUS_MAGMA)
37#if defined(SIRIUS_GPU) && defined(SIRIUS_CUDA)
41#if defined(SIRIUS_DLAF)
42#include <dlaf_c/eigensolver/eigensolver.h>
43#include <dlaf_c/eigensolver/gen_eigensolver.h>
60 PROFILE(
"Eigensolver_lapack|dsyevd");
61 return solve_(matrix_size__, A__, eval__, Z__);
64 int solve(ftn_int matrix_size__,
dmatrix<std::complex<double>>& A__,
double* eval__,
dmatrix<std::complex<double>>& Z__)
override {
65 PROFILE(
"Eigensolver_lapack|zheevd");
66 return solve_(matrix_size__, A__, eval__, Z__);
70 PROFILE(
"Eigensolver_lapack|ssyevd");
71 return solve_(matrix_size__, A__, eval__, Z__);
74 int solve(ftn_int matrix_size__,
dmatrix<std::complex<float>>& A__,
float* eval__,
dmatrix<std::complex<float>>& Z__)
override {
75 PROFILE(
"Eigensolver_lapack|cheevd");
76 return solve_(matrix_size__, A__, eval__, Z__);
84 ftn_int lda = A__.
ld();
87 ftn_int liwork = 3 + 5 * matrix_size__;
88 ftn_int lrwork = 1 + 5 * matrix_size__ + 2 * matrix_size__ * matrix_size__;
90 if (std::is_scalar<T>::value) {
91 lwork = 1 + 6 * matrix_size__ + 2 * matrix_size__ * matrix_size__;
93 lwork = 2 * matrix_size__ + matrix_size__ * matrix_size__;
96 auto& mph = get_memory_pool(sddk::memory_t::host);
98 auto work = mph.get_unique_ptr<T>(lwork);
99 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
100 auto rwork = mph.get_unique_ptr<real_type<T>>(lrwork);
102 if (std::is_same<T, double>::value) {
104 (
"V",
"U", &matrix_size__,
reinterpret_cast<double*
>(A__.at(sddk::memory_t::host)), &lda,
105 reinterpret_cast<double*
>(eval__),
reinterpret_cast<double*
>(work.get()), &lwork, iwork.get(), &liwork,
106 &info, (ftn_int)1, (ftn_int)1);
107 }
else if (std::is_same<T, std::complex<double>>::value) {
109 (
"V",
"U", &matrix_size__,
reinterpret_cast<std::complex<double>*
>(A__.at(sddk::memory_t::host)), &lda,
110 reinterpret_cast<double*
>(eval__),
reinterpret_cast<std::complex<double>*
>(work.get()), &lwork,
111 reinterpret_cast<double*
>(rwork.get()), &lrwork, iwork.get(), &liwork, &info, (ftn_int)1, (ftn_int)1);
112 }
else if (std::is_same<T, float>::value) {
114 (
"V",
"U", &matrix_size__,
reinterpret_cast<float*
>(A__.at(sddk::memory_t::host)), &lda,
115 reinterpret_cast<float*
>(eval__),
reinterpret_cast<float*
>(work.get()), &lwork, iwork.get(), &liwork,
116 &info, (ftn_int)1, (ftn_int)1);
117 }
else if (std::is_same<T, std::complex<float>>::value) {
119 (
"V",
"U", &matrix_size__,
reinterpret_cast<std::complex<float>*
>(A__.at(sddk::memory_t::host)), &lda,
120 reinterpret_cast<float*
>(eval__),
reinterpret_cast<std::complex<float>*
>(work.get()), &lwork,
121 reinterpret_cast<float*
>(rwork.get()), &lrwork, iwork.get(), &liwork, &info, (ftn_int)1, (ftn_int)1);
125 for (
int i = 0; i < matrix_size__; i++) {
126 std::copy(A__.at(sddk::memory_t::host, 0, i), A__.at(sddk::memory_t::host, 0, i) + matrix_size__,
127 Z__.at(sddk::memory_t::host, 0, i));
135 PROFILE(
"Eigensolver_lapack|dsyevr");
136 return solve_(matrix_size__, nev__, A__, eval__, Z__);
139 int solve(ftn_int matrix_size__, ftn_int nev__,
dmatrix<std::complex<double>>& A__,
double* eval__,
dmatrix<std::complex<double>>& Z__)
override {
140 PROFILE(
"Eigensolver_lapack|zheevx");
141 return solve_(matrix_size__, nev__, A__, eval__, Z__);
145 PROFILE(
"Eigensolver_lapack|ssyevr");
146 return solve_(matrix_size__, nev__, A__, eval__, Z__);
149 int solve(ftn_int matrix_size__, ftn_int nev__,
dmatrix<std::complex<float>>& A__,
float* eval__,
dmatrix<std::complex<float>>& Z__)
override {
150 PROFILE(
"Eigensolver_lapack|cheevx");
151 return solve_(matrix_size__, nev__, A__, eval__, Z__);
155 template <
typename T>
164 auto& mph = get_memory_pool(sddk::memory_t::host);
166 auto w = mph.get_unique_ptr<real_type<T>>(matrix_size__);
167 auto isuppz = mph.get_unique_ptr<ftn_int>(2 * matrix_size__);
168 auto ifail = mph.get_unique_ptr<ftn_int>(matrix_size__);
170 ftn_int lda = A__.
ld();
171 ftn_int ldz = Z__.
ld();
173 real_type<T> abs_tol = 2 * linalg_base::dlamch(
'S');
178 ftn_int lrwork = 7 * matrix_size__;
180 liwork = 10 * matrix_size__;
181 if (std::is_same<T, double>::value) {
182 nb = std::max(linalg_base::ilaenv(1,
"DSYTRD",
"U", matrix_size__, -1, -1, -1),
183 linalg_base::ilaenv(1,
"DORMTR",
"U", matrix_size__, -1, -1, -1));
184 liwork = 10 * matrix_size__;
185 lwork = std::max((nb + 6) * matrix_size__, 26 * matrix_size__);
186 }
else if (std::is_same<T, std::complex<double>>::value) {
187 nb = linalg_base::ilaenv(1,
"ZHETRD",
"U", matrix_size__, -1, -1, -1);
188 liwork = 5 * matrix_size__;
189 lwork = (nb + 1) * matrix_size__;
190 }
else if (std::is_same<T, float>::value) {
191 nb = std::max(linalg_base::ilaenv(1,
"SSYTRD",
"U", matrix_size__, -1, -1, -1),
192 linalg_base::ilaenv(1,
"SORMTR",
"U", matrix_size__, -1, -1, -1));
193 liwork = 10 * matrix_size__;
194 lwork = std::max((nb + 6) * matrix_size__, 26 * matrix_size__);
195 }
else if (std::is_same<T, std::complex<float>>::value) {
196 nb = linalg_base::ilaenv(1,
"CHETRD",
"U", matrix_size__, -1, -1, -1);
197 liwork = 5 * matrix_size__;
198 lwork = (nb + 1) * matrix_size__;
201 auto work = mph.get_unique_ptr<T>(lwork);
202 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
203 auto rwork = mph.get_unique_ptr<real_type<T>>(lrwork);
205 if (std::is_same<T, double>::value) {
207 (
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<double*
>(A__.at(sddk::memory_t::host)), &lda,
208 reinterpret_cast<double*
>(&vl),
reinterpret_cast<double*
>(&vu), &il, &nev__,
209 reinterpret_cast<double*
>(&abs_tol), &m,
reinterpret_cast<double*
>(w.get()),
210 reinterpret_cast<double*
>(Z__.at(sddk::memory_t::host)), &ldz, isuppz.get(),
211 reinterpret_cast<double*
>(work.get()), &lwork, iwork.get(), &liwork, &info, (ftn_int)1, (ftn_int)1,
213 lwork = std::max((nb + 6) * matrix_size__, 26 * matrix_size__);
214 }
else if (std::is_same<T, std::complex<double>>::value) {
216 (
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<std::complex<double>*
>(A__.at(sddk::memory_t::host)), &lda,
217 reinterpret_cast<double*
>(&vl),
reinterpret_cast<double*
>(&vu), &il, &nev__,
218 reinterpret_cast<double*
>(&abs_tol), &m,
reinterpret_cast<double*
>(w.get()),
219 reinterpret_cast<std::complex<double>*
>(Z__.at(sddk::memory_t::host)), &ldz,
220 reinterpret_cast<std::complex<double>*
>(work.get()), &lwork,
reinterpret_cast<double*
>(rwork.get()),
221 iwork.get(), ifail.get(), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
222 }
else if (std::is_same<T, float>::value) {
224 (
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<float*
>(A__.at(sddk::memory_t::host)), &lda,
225 reinterpret_cast<float*
>(&vl),
reinterpret_cast<float*
>(&vu), &il, &nev__,
226 reinterpret_cast<float*
>(&abs_tol), &m,
reinterpret_cast<float*
>(w.get()),
227 reinterpret_cast<float*
>(Z__.at(sddk::memory_t::host)), &ldz, isuppz.get(),
reinterpret_cast<float*
>(work.get()),
228 &lwork, iwork.get(), &liwork, &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
229 }
else if (std::is_same<T, std::complex<float>>::value) {
231 (
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<std::complex<float>*
>(A__.at(sddk::memory_t::host)), &lda,
232 reinterpret_cast<float*
>(&vl),
reinterpret_cast<float*
>(&vu), &il, &nev__,
233 reinterpret_cast<float*
>(&abs_tol), &m,
reinterpret_cast<float*
>(w.get()),
234 reinterpret_cast<std::complex<float>*
>(Z__.at(sddk::memory_t::host)), &ldz,
235 reinterpret_cast<std::complex<float>*
>(work.get()), &lwork,
reinterpret_cast<float*
>(rwork.get()),
236 iwork.get(), ifail.get(), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
241 s <<
"not all eigen-values are found" << std::endl
242 <<
"target number of eigen-values: " << nev__ << std::endl
243 <<
"number of eigen-values found: " << m << std::endl
244 <<
"matrix_size : " << matrix_size__ << std::endl
245 <<
"lda : " << lda << std::endl
246 <<
"lda : " << lda << std::endl
247 <<
"nb : " << nb << std::endl
248 <<
"liwork : " << liwork << std::endl
249 <<
"lwork : " << lwork << std::endl;
255 std::copy(w.get(), w.get() + nev__, eval__);
264 PROFILE(
"Eigensolver_lapack|dsygvx");
265 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
268 int solve(ftn_int matrix_size__, ftn_int nev__,
dmatrix<std::complex<double>>& A__,
dmatrix<std::complex<double>>& B__,
double* eval__,
269 dmatrix<std::complex<double>>& Z__)
override {
270 PROFILE(
"Eigensolver_lapack|zhegvx");
271 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
276 PROFILE(
"Eigensolver_lapack|ssygvx");
277 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
280 int solve(ftn_int matrix_size__, ftn_int nev__,
dmatrix<std::complex<float>>& A__,
dmatrix<std::complex<float>>& B__,
float* eval__,
281 dmatrix<std::complex<float>>& Z__)
override {
282 PROFILE(
"Eigensolver_lapack|chegvx");
283 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
287 template <
typename T>
293 ftn_int lda = A__.
ld();
294 ftn_int ldb = B__.
ld();
295 ftn_int ldz = Z__.
ld();
297 real_type<T> abs_tol = 2 * linalg_base::dlamch(
'S');
304 auto& mph = get_memory_pool(sddk::memory_t::host);
306 auto w = mph.get_unique_ptr<real_type<T>>(matrix_size__);
307 auto ifail = mph.get_unique_ptr<ftn_int>(matrix_size__);
309 int nb, lwork, liwork;
311 if (std::is_same<T, double>::value) {
312 nb = linalg_base::ilaenv(1,
"DSYTRD",
"U", matrix_size__, 0, 0, 0);
313 lwork = (nb + 3) * matrix_size__ + 1024;
314 liwork = 5 * matrix_size__;
315 }
else if (std::is_same<T, std::complex<double>>::value) {
316 nb = linalg_base::ilaenv(1,
"ZHETRD",
"U", matrix_size__, 0, 0, 0);
317 lwork = (nb + 1) * matrix_size__;
318 lrwork = 7 * matrix_size__;
319 liwork = 5 * matrix_size__;
320 }
else if (std::is_same<T, float>::value) {
321 nb = linalg_base::ilaenv(1,
"SSYTRD",
"U", matrix_size__, 0, 0, 0);
322 lwork = (nb + 3) * matrix_size__ + 1024;
323 liwork = 5 * matrix_size__;
324 }
else if (std::is_same<T, std::complex<float>>::value) {
325 nb = linalg_base::ilaenv(1,
"CHETRD",
"U", matrix_size__, 0, 0, 0);
326 lwork = (nb + 1) * matrix_size__;
327 lrwork = 7 * matrix_size__;
328 liwork = 5 * matrix_size__;
331 auto work = mph.get_unique_ptr<T>(lwork);
332 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
333 auto rwork = mph.get_unique_ptr<real_type<T>>(lrwork);
335 if (std::is_same<T, double>::value) {
337 (&ione,
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<double*
>(A__.at(sddk::memory_t::host)), &lda,
338 reinterpret_cast<double*
>(B__.at(sddk::memory_t::host)), &ldb,
reinterpret_cast<double*
>(&vl),
339 reinterpret_cast<double*
>(&vu), &ione, &nev__,
reinterpret_cast<double*
>(&abs_tol), &m,
340 reinterpret_cast<double*
>(w.get()),
reinterpret_cast<double*
>(Z__.at(sddk::memory_t::host)), &ldz,
341 reinterpret_cast<double*
>(work.get()), &lwork, iwork.get(), ifail.get(), &info, (ftn_int)1, (ftn_int)1,
343 }
else if (std::is_same<T, std::complex<double>>::value) {
345 (&ione,
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<std::complex<double>*
>(A__.at(sddk::memory_t::host)),
346 &lda,
reinterpret_cast<std::complex<double>*
>(B__.at(sddk::memory_t::host)), &ldb,
347 reinterpret_cast<double*
>(&vl),
reinterpret_cast<double*
>(&vu), &ione, &nev__,
348 reinterpret_cast<double*
>(&abs_tol), &m,
reinterpret_cast<double*
>(w.get()),
349 reinterpret_cast<std::complex<double>*
>(Z__.at(sddk::memory_t::host)), &ldz,
350 reinterpret_cast<std::complex<double>*
>(work.get()), &lwork,
reinterpret_cast<double*
>(rwork.get()),
351 iwork.get(), ifail.get(), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
352 }
else if (std::is_same<T, float>::value) {
354 (&ione,
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<float*
>(A__.at(sddk::memory_t::host)), &lda,
355 reinterpret_cast<float*
>(B__.at(sddk::memory_t::host)), &ldb,
reinterpret_cast<float*
>(&vl),
356 reinterpret_cast<float*
>(&vu), &ione, &nev__,
reinterpret_cast<float*
>(&abs_tol), &m,
357 reinterpret_cast<float*
>(w.get()),
reinterpret_cast<float*
>(Z__.at(sddk::memory_t::host)), &ldz,
358 reinterpret_cast<float*
>(work.get()), &lwork, iwork.get(), ifail.get(), &info, (ftn_int)1, (ftn_int)1,
360 }
else if (std::is_same<T, std::complex<float>>::value) {
362 (&ione,
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<std::complex<float>*
>(A__.at(sddk::memory_t::host)), &lda,
363 reinterpret_cast<std::complex<float>*
>(B__.at(sddk::memory_t::host)), &ldb,
reinterpret_cast<float*
>(&vl),
364 reinterpret_cast<float*
>(&vu), &ione, &nev__,
reinterpret_cast<float*
>(&abs_tol), &m,
365 reinterpret_cast<float*
>(w.get()),
reinterpret_cast<std::complex<float>*
>(Z__.at(sddk::memory_t::host)), &ldz,
366 reinterpret_cast<std::complex<float>*
>(work.get()), &lwork,
reinterpret_cast<float*
>(rwork.get()),
367 iwork.get(), ifail.get(), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
372 s <<
"not all eigen-values are found" << std::endl
373 <<
"target number of eigen-values: " << nev__ << std::endl
374 <<
"number of eigen-values found: " << m;
380 std::copy(w.get(), w.get() + nev__, eval__);
451 static void initialize();
453 static void finalize();
459 int solve(ftn_int matrix_size__, ftn_int nev__,
dmatrix<std::complex<double>>& A__,
dmatrix<std::complex<double>>& B__,
460 double* eval__,
dmatrix<std::complex<double>>& Z__)
override;
466 int solve(ftn_int matrix_size__,
dmatrix<std::complex<double>>& A__,
dmatrix<std::complex<double>>& B__,
double* eval__,
467 dmatrix<std::complex<double>>& Z__)
override;
473 int solve(ftn_int matrix_size__, ftn_int nev__,
dmatrix<std::complex<double>>& A__,
double* eval__,
474 dmatrix<std::complex<double>>& Z__)
override;
479 int solve(ftn_int matrix_size__,
dmatrix<std::complex<double>>& A__,
double* eval__,
dmatrix<std::complex<double>>& Z__)
override;
492#ifdef SIRIUS_SCALAPACK
496 double const ortfac_{1e-6};
497 double const abstol_{1e-12};
506 template <typename T, typename = std::enable_if_t<!std::is_scalar<T>::value>>
510 linalg_base::descinit(desca, matrix_size__, matrix_size__, A__.
bs_row(), A__.
bs_col(), 0, 0,
511 A__.blacs_grid().context(), A__.
ld());
514 linalg_base::descinit(descz, matrix_size__, matrix_size__, Z__.
bs_row(), Z__.
bs_col(), 0, 0,
515 Z__.blacs_grid().context(), Z__.
ld());
528 if (std::is_same<T, std::complex<double>>::value) {
530 (
"V",
"U", &matrix_size__,
reinterpret_cast<std::complex<double>*
>(A__.at(sddk::memory_t::host)), &ione, &ione,
531 desca,
reinterpret_cast<double*
>(eval__),
reinterpret_cast<std::complex<double>*
>(Z__.at(sddk::memory_t::host)),
532 &ione, &ione, descz,
reinterpret_cast<std::complex<double>*
>(&work1), &lwork,
reinterpret_cast<double*
>(&rwork1),
533 &lrwork, &iwork1, &liwork, &info, (ftn_int)1, (ftn_int)1);
534 }
else if (std::is_same<T, std::complex<float>>::value) {
536 (
"V",
"U", &matrix_size__,
reinterpret_cast<std::complex<float>*
>(A__.at(sddk::memory_t::host)), &ione, &ione,
537 desca,
reinterpret_cast<float*
>(eval__),
reinterpret_cast<std::complex<float>*
>(Z__.at(sddk::memory_t::host)),
538 &ione, &ione, descz,
reinterpret_cast<std::complex<float>*
>(&work1), &lwork,
reinterpret_cast<float*
>(&rwork1),
539 &lrwork, &iwork1, &liwork, &info, (ftn_int)1, (ftn_int)1);
542 lwork =
static_cast<ftn_int
>(work1.real()) + 1;
543 lrwork =
static_cast<ftn_int
>(rwork1) + 1;
546 auto& mph = get_memory_pool(sddk::memory_t::host);
547 auto work = mph.get_unique_ptr<T>(lwork);
548 auto rwork = mph.get_unique_ptr<real_type<T>>(lrwork);
549 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
551 if (std::is_same<T, std::complex<double>>::value) {
553 (
"V",
"U", &matrix_size__,
reinterpret_cast<std::complex<double>*
>(A__.at(sddk::memory_t::host)), &ione, &ione,
554 desca,
reinterpret_cast<double*
>(eval__),
reinterpret_cast<std::complex<double>*
>(Z__.at(sddk::memory_t::host)),
555 &ione, &ione, descz,
reinterpret_cast<std::complex<double>*
>(work.get()), &lwork,
reinterpret_cast<double*
>(rwork.get()),
556 &lrwork, iwork.get(), &liwork, &info, (ftn_int)1, (ftn_int)1);
557 }
else if (std::is_same<T, std::complex<float>>::value) {
559 (
"V",
"U", &matrix_size__,
reinterpret_cast<std::complex<float>*
>(A__.at(sddk::memory_t::host)), &ione, &ione,
560 desca,
reinterpret_cast<float*
>(eval__),
reinterpret_cast<std::complex<float>*
>(Z__.at(sddk::memory_t::host)),
561 &ione, &ione, descz,
reinterpret_cast<std::complex<float>*
>(work.get()), &lwork,
reinterpret_cast<float*
>(rwork.get()),
562 &lrwork, iwork.get(), &liwork, &info, (ftn_int)1, (ftn_int)1);
568 int solve(ftn_int matrix_size__,
dmatrix<std::complex<double>>& A__,
double* eval__,
dmatrix<std::complex<double>>& Z__)
override
570 PROFILE(
"Eigensolver_scalapack|pzheevd");
571 return solve_(matrix_size__, A__, eval__, Z__);
574 int solve(ftn_int matrix_size__,
dmatrix<std::complex<float>>& A__,
float* eval__,
dmatrix<std::complex<float>>& Z__)
override
576 PROFILE(
"Eigensolver_scalapack|pcheevd");
577 return solve_(matrix_size__, A__, eval__, Z__);
580 template <typename T, typename = std::enable_if_t<std::is_scalar<T>::value>>
592 if (std::is_same<T, double>::value) {
594 (
"V",
"U", &matrix_size__,
reinterpret_cast<double*
>(A__.at(sddk::memory_t::host)), &ione, &ione,
595 const_cast<ftn_int*
>(A__.descriptor()),
reinterpret_cast<double*
>(eval__),
596 reinterpret_cast<double*
>(Z__.at(sddk::memory_t::host)), &ione, &ione,
597 const_cast<ftn_int*
>(Z__.descriptor()),
reinterpret_cast<double*
>(work1), &lwork, iwork1, &liwork, &info,
598 (ftn_int)1, (ftn_int)1);
599 }
else if (std::is_same<T, float>::value) {
601 (
"V",
"U", &matrix_size__,
reinterpret_cast<float*
>(A__.at(sddk::memory_t::host)), &ione, &ione,
602 const_cast<ftn_int*
>(A__.descriptor()),
reinterpret_cast<float*
>(eval__),
603 reinterpret_cast<float*
>(Z__.at(sddk::memory_t::host)), &ione, &ione,
604 const_cast<ftn_int*
>(Z__.descriptor()),
reinterpret_cast<float*
>(work1), &lwork, iwork1, &liwork, &info,
605 (ftn_int)1, (ftn_int)1);
608 lwork =
static_cast<ftn_int
>(work1[0]) + 1;
611 auto& mph = get_memory_pool(sddk::memory_t::host);
612 auto work = mph.get_unique_ptr<T>(lwork);
613 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
615 if (std::is_same<T, double>::value) {
617 (
"V",
"U", &matrix_size__,
reinterpret_cast<double*
>(A__.at(sddk::memory_t::host)), &ione, &ione,
618 const_cast<ftn_int*
>(A__.descriptor()),
reinterpret_cast<double*
>(eval__),
619 reinterpret_cast<double*
>(Z__.at(sddk::memory_t::host)), &ione, &ione,
620 const_cast<ftn_int*
>(Z__.descriptor()),
reinterpret_cast<double*
>(work.get()), &lwork, iwork.get(),
621 &liwork, &info, (ftn_int)1, (ftn_int)1);
622 }
else if (std::is_same<T, float>::value) {
624 (
"V",
"U", &matrix_size__,
reinterpret_cast<float*
>(A__.at(sddk::memory_t::host)), &ione, &ione,
625 const_cast<ftn_int*
>(A__.descriptor()),
reinterpret_cast<float*
>(eval__),
626 reinterpret_cast<float*
>(Z__.at(sddk::memory_t::host)), &ione, &ione,
627 const_cast<ftn_int*
>(Z__.descriptor()),
reinterpret_cast<float*
>(work.get()), &lwork, iwork.get(),
628 &liwork, &info, (ftn_int)1, (ftn_int)1);
635 PROFILE(
"Eigensolver_scalapack|pdsyevd");
636 return solve_(matrix_size__, A__, eval__, Z__);
641 PROFILE(
"Eigensolver_scalapack|pssyevd");
642 return solve_(matrix_size__, A__, eval__, Z__);
646 template <typename T, typename = std::enable_if_t<std::is_scalar<T>::value>>
650 linalg_base::descinit(desca, matrix_size__, matrix_size__, A__.
bs_row(), A__.
bs_col(), 0, 0,
651 A__.blacs_grid().context(), A__.
ld());
654 linalg_base::descinit(descz, matrix_size__, matrix_size__, Z__.
bs_row(), Z__.
bs_col(), 0, 0,
655 Z__.blacs_grid().context(), Z__.
ld());
664 auto& mph = get_memory_pool(sddk::memory_t::host);
666 auto ifail = mph.get_unique_ptr<ftn_int>(matrix_size__);
667 auto iclustr = mph.get_unique_ptr<ftn_int>(2 * A__.blacs_grid().comm().size());
668 auto gap = mph.get_unique_ptr<T>(A__.blacs_grid().comm().size());
669 auto w = mph.get_unique_ptr<T>(matrix_size__);
677 if (std::is_same<T, double>::value) {
679 (
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<double*
>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
680 reinterpret_cast<double*
>(&d1),
reinterpret_cast<double*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
681 reinterpret_cast<double*
>(w.get()), &ortfac_,
reinterpret_cast<double*
>(Z__.at(sddk::memory_t::host)), &ione, &ione,
682 descz,
reinterpret_cast<double*
>(work3), &lwork, &iwork1, &liwork, ifail.get(), iclustr.get(),
683 reinterpret_cast<double*
>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
684 }
else if (std::is_same<T, float>::value) {
686 (
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<float*
>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
687 reinterpret_cast<float*
>(&d1),
reinterpret_cast<float*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
688 reinterpret_cast<float*
>(w.get()), &ortfac_,
reinterpret_cast<float*
>(Z__.at(sddk::memory_t::host)), &ione, &ione,
689 descz,
reinterpret_cast<float*
>(work3), &lwork, &iwork1, &liwork, ifail.get(), iclustr.get(),
690 reinterpret_cast<float*
>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
693 lwork =
static_cast<ftn_int
>(work3[0]) + 4 * (1 << 20);
696 auto work = mph.get_unique_ptr<T>(lwork);
697 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
699 if (std::is_same<T, double>::value) {
701 (
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<double*
>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
702 reinterpret_cast<double*
>(&d1),
reinterpret_cast<double*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
703 reinterpret_cast<double*
>(w.get()), &ortfac_,
reinterpret_cast<double*
>(Z__.at(sddk::memory_t::host)), &ione, &ione,
704 descz,
reinterpret_cast<double*
>(work.get()), &lwork, iwork.get(), &liwork, ifail.get(), iclustr.get(),
705 reinterpret_cast<double*
>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
706 }
else if (std::is_same<T, float>::value) {
708 (
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<float*
>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
709 reinterpret_cast<float*
>(&d1),
reinterpret_cast<float*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
710 reinterpret_cast<float*
>(w.get()), &ortfac_,
reinterpret_cast<float*
>(Z__.at(sddk::memory_t::host)), &ione, &ione,
711 descz,
reinterpret_cast<float*
>(work.get()), &lwork, iwork.get(), &liwork, ifail.get(), iclustr.get(),
712 reinterpret_cast<float*
>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
715 if ((m != nev__) || (nz != nev__)) {
716 RTE_WARNING(
"Not all eigen-vectors or eigen-values are found.");
721 if ((info / 2) % 2) {
723 s <<
"eigenvectors corresponding to one or more clusters of eigenvalues" << std::endl
724 <<
"could not be reorthogonalized because of insufficient workspace" << std::endl;
726 int k = A__.blacs_grid().comm().size();
727 for (
int i = 0; i < A__.blacs_grid().comm().size() - 1; i++) {
728 if ((iclustr.get()[2 * i + 1] != 0) && (iclustr.get()[2 * (i + 1)] == 0)) {
734 s <<
"number of eigenvalue clusters : " << k << std::endl;
735 for (
int i = 0; i < k; i++) {
736 s << iclustr.get()[2 * i] <<
" : " << iclustr.get()[2 * i + 1] << std::endl;
742 if (std::is_same<T, double>::value) {
743 s <<
"pdsyevx returned " << info;
744 }
else if (std::is_same<T, float>::value) {
745 s <<
"pssyevx returned " << info;
749 std::copy(w.get(), w.get() + nev__, eval__);
757 PROFILE(
"Eigensolver_scalapack|pdsyevx");
758 return solve_(matrix_size__, nev__, A__, eval__, Z__);
763 PROFILE(
"Eigensolver_scalapack|pssyevx");
764 return solve_(matrix_size__, nev__, A__, eval__, Z__);
768 template <typename T, typename = std::enable_if_t<!std::is_scalar<T>::value>>
772 linalg_base::descinit(desca, matrix_size__, matrix_size__, A__.
bs_row(), A__.
bs_col(), 0, 0,
773 A__.blacs_grid().context(), A__.
ld());
776 linalg_base::descinit(descz, matrix_size__, matrix_size__, Z__.
bs_row(), Z__.
bs_col(), 0, 0,
777 Z__.blacs_grid().context(), Z__.
ld());
786 auto& mph = get_memory_pool(sddk::memory_t::host);
788 auto ifail = mph.get_unique_ptr<ftn_int>(matrix_size__);
789 auto iclustr = mph.get_unique_ptr<ftn_int>(2 * A__.blacs_grid().comm().size());
790 auto gap = mph.get_unique_ptr<real_type<T>>(A__.blacs_grid().comm().size());
791 auto w = mph.get_unique_ptr<real_type<T>>(matrix_size__);
795 real_type<T> rwork3[3];
801 if (std::is_same<T, std::complex<double>>::value) {
803 (
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<std::complex<double>*
>(A__.at(sddk::memory_t::host)), &ione, &ione,
804 desca,
reinterpret_cast<double*
>(&d1),
reinterpret_cast<double*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
805 reinterpret_cast<double*
>(w.get()), &ortfac_,
reinterpret_cast<std::complex<double>*
>(Z__.at(sddk::memory_t::host)),
806 &ione, &ione, descz,
reinterpret_cast<std::complex<double>*
>(work3), &lwork,
reinterpret_cast<double*
>(rwork3),
807 &lrwork, &iwork1, &liwork, ifail.get(), iclustr.get(),
reinterpret_cast<double*
>(gap.get()), &info, (ftn_int)1,
808 (ftn_int)1, (ftn_int)1);
809 }
else if (std::is_same<T, std::complex<float>>::value) {
811 (
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<std::complex<float>*
>(A__.at(sddk::memory_t::host)), &ione, &ione,
812 desca,
reinterpret_cast<float*
>(&d1),
reinterpret_cast<float*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
813 reinterpret_cast<float*
>(w.get()), &ortfac_,
reinterpret_cast<std::complex<float>*
>(Z__.at(sddk::memory_t::host)),
814 &ione, &ione, descz,
reinterpret_cast<std::complex<float>*
>(work3), &lwork,
reinterpret_cast<float*
>(rwork3),
815 &lrwork, &iwork1, &liwork, ifail.get(), iclustr.get(),
reinterpret_cast<float*
>(gap.get()), &info, (ftn_int)1,
816 (ftn_int)1, (ftn_int)1);
819 lwork =
static_cast<int32_t
>(work3[0].real()) + (1 << 16);
820 lrwork =
static_cast<int32_t
>(rwork3[0]) + (1 << 16);
823 auto work = mph.get_unique_ptr<T>(lwork);
824 auto rwork = mph.get_unique_ptr<real_type<T>>(lrwork);
825 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
827 if (std::is_same<T, std::complex<double>>::value) {
829 (
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<std::complex<double>*
>(A__.at(sddk::memory_t::host)), &ione, &ione,
830 desca,
reinterpret_cast<double*
>(&d1),
reinterpret_cast<double*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
831 reinterpret_cast<double*
>(w.get()), &ortfac_,
reinterpret_cast<std::complex<double>*
>(Z__.at(sddk::memory_t::host)),
832 &ione, &ione, descz,
reinterpret_cast<std::complex<double>*
>(work.get()), &lwork,
reinterpret_cast<double*
>(rwork.get()),
833 &lrwork, iwork.get(), &liwork, ifail.get(), iclustr.get(),
reinterpret_cast<double*
>(gap.get()), &info,
834 (ftn_int)1, (ftn_int)1, (ftn_int)1);
835 }
else if (std::is_same<T, std::complex<float>>::value) {
837 (
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<std::complex<float>*
>(A__.at(sddk::memory_t::host)), &ione, &ione,
838 desca,
reinterpret_cast<float*
>(&d1),
reinterpret_cast<float*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
839 reinterpret_cast<float*
>(w.get()), &ortfac_,
reinterpret_cast<std::complex<float>*
>(Z__.at(sddk::memory_t::host)),
840 &ione, &ione, descz,
reinterpret_cast<std::complex<float>*
>(work.get()), &lwork,
reinterpret_cast<float*
>(rwork.get()),
841 &lrwork, iwork.get(), &liwork, ifail.get(), iclustr.get(),
reinterpret_cast<float*
>(gap.get()), &info,
842 (ftn_int)1, (ftn_int)1, (ftn_int)1);
845 if ((m != nev__) || (nz != nev__)) {
846 RTE_WARNING(
"Not all eigen-vectors or eigen-values are found.");
851 if ((info / 2) % 2) {
853 s <<
"eigenvectors corresponding to one or more clusters of eigenvalues" << std::endl
854 <<
"could not be reorthogonalized because of insufficient workspace" << std::endl;
856 int k = A__.blacs_grid().comm().size();
857 for (
int i = 0; i < A__.blacs_grid().comm().size() - 1; i++) {
858 if ((iclustr.get()[2 * i + 1] != 0) && (iclustr.get()[2 * (i + 1)] == 0)) {
864 s <<
"number of eigenvalue clusters : " << k << std::endl;
865 for (
int i = 0; i < k; i++) {
866 s << iclustr.get()[2 * i] <<
" : " << iclustr.get()[2 * i + 1] << std::endl;
872 if (std::is_same<T, std::complex<double>>::value) {
873 s <<
"pzheevx returned " << info;
874 }
else if (std::is_same<T, std::complex<float>>::value) {
875 s <<
"pcheevx returned " << info;
879 std::copy(w.get(), w.get() + nev__, eval__);
885 int solve(ftn_int matrix_size__, ftn_int nev__,
dmatrix<std::complex<double>>& A__,
double* eval__,
dmatrix<std::complex<double>>& Z__)
override
887 PROFILE(
"Eigensolver_scalapack|pzheevx");
888 return solve_(matrix_size__, nev__, A__, eval__, Z__);
891 int solve(ftn_int matrix_size__, ftn_int nev__,
dmatrix<std::complex<float>>& A__,
float* eval__,
dmatrix<std::complex<float>>& Z__)
override
893 PROFILE(
"Eigensolver_scalapack|pcheevx");
894 return solve_(matrix_size__, nev__, A__, eval__, Z__);
898 template <typename T, typename = std::enable_if_t<std::is_scalar<T>::value>>
902 linalg_base::descinit(desca, matrix_size__, matrix_size__, A__.
bs_row(), A__.
bs_col(), 0, 0,
903 A__.blacs_grid().context(), A__.
ld());
906 linalg_base::descinit(descb, matrix_size__, matrix_size__, B__.
bs_row(), B__.
bs_col(), 0, 0,
907 B__.blacs_grid().context(), B__.
ld());
910 linalg_base::descinit(descz, matrix_size__, matrix_size__, Z__.
bs_row(), Z__.
bs_col(), 0, 0,
911 Z__.blacs_grid().context(), Z__.
ld());
913 auto& mph = get_memory_pool(sddk::memory_t::host);
914 auto ifail = mph.get_unique_ptr<ftn_int>(matrix_size__);
915 auto iclustr = mph.get_unique_ptr<ftn_int>(2 * A__.blacs_grid().comm().size());
916 auto gap = mph.get_unique_ptr<T>(A__.blacs_grid().comm().size());
917 auto w = mph.get_unique_ptr<T>(matrix_size__);
930 if (std::is_same<T, double>::value) {
932 (&ione,
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<double*
>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
933 reinterpret_cast<double*
>(B__.at(sddk::memory_t::host)), &ione, &ione, descb,
reinterpret_cast<double*
>(&d1),
934 reinterpret_cast<double*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
reinterpret_cast<double*
>(w.get()), &ortfac_,
935 reinterpret_cast<double*
>(Z__.at(sddk::memory_t::host)), &ione, &ione, descz,
reinterpret_cast<double*
>(work1), &lwork, &liwork, &lwork,
936 ifail.get(), iclustr.get(),
reinterpret_cast<double*
>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
937 }
else if (std::is_same<T, float>::value) {
939 (&ione,
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<float*
>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
940 reinterpret_cast<float*
>(B__.at(sddk::memory_t::host)), &ione, &ione, descb,
reinterpret_cast<float*
>(&d1),
941 reinterpret_cast<float*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
reinterpret_cast<float*
>(w.get()), &ortfac_,
942 reinterpret_cast<float*
>(Z__.at(sddk::memory_t::host)), &ione, &ione, descz,
reinterpret_cast<float*
>(work1), &lwork, &liwork, &lwork,
943 ifail.get(), iclustr.get(),
reinterpret_cast<float*
>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
946 lwork =
static_cast<int32_t
>(work1[0]) + 4 * (1 << 20);
948 auto work = mph.get_unique_ptr<T>(lwork);
949 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
951 if (std::is_same<T, double>::value) {
953 (&ione,
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<double*
>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
954 reinterpret_cast<double*
>(B__.at(sddk::memory_t::host)), &ione, &ione, descb,
reinterpret_cast<double*
>(&d1),
955 reinterpret_cast<double*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
reinterpret_cast<double*
>(w.get()), &ortfac_,
956 reinterpret_cast<double*
>(Z__.at(sddk::memory_t::host)), &ione, &ione, descz,
reinterpret_cast<double*
>(work.get()),
957 &lwork, iwork.get(), &liwork, ifail.get(), iclustr.get(),
reinterpret_cast<double*
>(gap.get()), &info,
958 (ftn_int)1, (ftn_int)1, (ftn_int)1);
959 }
else if (std::is_same<T, float>::value) {
961 (&ione,
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<float*
>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
962 reinterpret_cast<float*
>(B__.at(sddk::memory_t::host)), &ione, &ione, descb,
reinterpret_cast<float*
>(&d1),
963 reinterpret_cast<float*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
reinterpret_cast<float*
>(w.get()), &ortfac_,
964 reinterpret_cast<float*
>(Z__.at(sddk::memory_t::host)), &ione, &ione, descz,
reinterpret_cast<float*
>(work.get()),
965 &lwork, iwork.get(), &liwork, ifail.get(), iclustr.get(),
reinterpret_cast<float*
>(gap.get()), &info,
966 (ftn_int)1, (ftn_int)1, (ftn_int)1);
969 if ((m != nev__) || (nz != nev__)) {
970 RTE_WARNING(
"Not all eigen-vectors or eigen-values are found.");
975 if ((info / 2) % 2) {
977 s <<
"eigenvectors corresponding to one or more clusters of eigenvalues" << std::endl
978 <<
"could not be reorthogonalized because of insufficient workspace" << std::endl;
980 int k = A__.blacs_grid().comm().size();
981 for (
int i = 0; i < A__.blacs_grid().comm().size() - 1; i++) {
982 if ((iclustr.get()[2 * i + 1] != 0) && (iclustr.get()[2 * (i + 1)] == 0)) {
988 s <<
"number of eigenvalue clusters : " << k << std::endl;
989 for (
int i = 0; i < k; i++) {
990 s << iclustr.get()[2 * i] <<
" : " << iclustr.get()[2 * i + 1] << std::endl;
996 if (std::is_same<T, double>::value) {
997 s <<
"pdsygvx returned " << info;
998 }
else if (std::is_same<T, float>::value) {
999 s <<
"pssygvx returned " << info;
1003 std::copy(w.get(), w.get() + nev__, eval__);
1011 PROFILE(
"Eigensolver_scalapack|pdsygvx");
1012 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1017 PROFILE(
"Eigensolver_scalapack|pssygvx");
1018 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1022 template <typename T, typename = std::enable_if_t<!std::is_scalar<T>::value>>
1026 linalg_base::descinit(desca, matrix_size__, matrix_size__, A__.
bs_row(), A__.
bs_col(), 0, 0,
1027 A__.blacs_grid().context(), A__.
ld());
1030 linalg_base::descinit(descb, matrix_size__, matrix_size__, B__.
bs_row(), B__.
bs_col(), 0, 0,
1031 B__.blacs_grid().context(), B__.
ld());
1034 linalg_base::descinit(descz, matrix_size__, matrix_size__, Z__.
bs_row(), Z__.
bs_col(), 0, 0,
1035 Z__.blacs_grid().context(), Z__.
ld());
1037 auto& mph = get_memory_pool(sddk::memory_t::host);
1039 auto ifail = mph.get_unique_ptr<ftn_int>(matrix_size__);
1040 auto iclustr = mph.get_unique_ptr<ftn_int>(2 * A__.blacs_grid().comm().size());
1041 auto gap = mph.get_unique_ptr<real_type<T>>(A__.blacs_grid().comm().size());
1042 auto w = mph.get_unique_ptr<real_type<T>>(matrix_size__);
1056 real_type<T> rwork3[3];
1060 if (std::is_same<T, std::complex<double>>::value) {
1062 (&ione,
"V",
"I",
"U", &matrix_size__,
1063 reinterpret_cast<std::complex<double>*
>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
1064 reinterpret_cast<std::complex<double>*
>(B__.at(sddk::memory_t::host)), &ione, &ione, descb,
1065 reinterpret_cast<double*
>(&d1),
reinterpret_cast<double*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
1066 reinterpret_cast<double*
>(w.get()), &ortfac_,
1067 reinterpret_cast<std::complex<double>*
>(Z__.at(sddk::memory_t::host)), &ione, &ione, descz,
1068 reinterpret_cast<std::complex<double>*
>(&work1), &lwork,
reinterpret_cast<double*
>(rwork3),
1069 &lrwork, &iwork1, &liwork, ifail.get(), iclustr.get(),
reinterpret_cast<double*
>(gap.get()),
1070 &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
1071 }
else if (std::is_same<T, std::complex<float>>::value) {
1073 (&ione,
"V",
"I",
"U", &matrix_size__,
1074 reinterpret_cast<std::complex<float>*
>(A__.at(sddk::memory_t::host)), &ione, &ione, desca,
1075 reinterpret_cast<std::complex<float>*
>(B__.at(sddk::memory_t::host)), &ione, &ione, descb,
1076 reinterpret_cast<float*
>(&d1),
reinterpret_cast<float*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
1077 reinterpret_cast<float*
>(w.get()), &ortfac_,
1078 reinterpret_cast<std::complex<float>*
>(Z__.at(sddk::memory_t::host)), &ione, &ione, descz,
1079 reinterpret_cast<std::complex<float>*
>(&work1), &lwork,
reinterpret_cast<float*
>(rwork3), &lrwork, &iwork1,
1080 &liwork, ifail.get(), iclustr.get(),
reinterpret_cast<float*
>(gap.get()), &info, (ftn_int)1,
1081 (ftn_int)1, (ftn_int)1);
1084 lwork = 2 *
static_cast<int32_t
>(work1.real()) + 4096;
1085 lrwork = 2 *
static_cast<int32_t
>(rwork3[0]) + 4096;
1086 liwork = 2 * iwork1 + 4096;
1088 auto work = mph.get_unique_ptr<T>(lwork);
1089 auto rwork = mph.get_unique_ptr<real_type<T>>(lrwork);
1090 auto iwork = mph.get_unique_ptr<ftn_int>(liwork);
1092 if (std::is_same<T, std::complex<double>>::value) {
1094 (&ione,
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<std::complex<double>*
>(A__.at(sddk::memory_t::host)), &ione,
1095 &ione, desca,
reinterpret_cast<std::complex<double>*
>(B__.at(sddk::memory_t::host)), &ione, &ione, descb,
1096 reinterpret_cast<double*
>(&d1),
reinterpret_cast<double*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
1097 reinterpret_cast<double*
>(w.get()), &ortfac_,
reinterpret_cast<std::complex<double>*
>(Z__.at(sddk::memory_t::host)),
1098 &ione, &ione, descz,
reinterpret_cast<std::complex<double>*
>(work.get()), &lwork,
1099 reinterpret_cast<double*
>(rwork.get()), &lrwork, iwork.get(), &liwork, ifail.get(),
1100 iclustr.get(),
reinterpret_cast<double*
>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
1101 }
else if (std::is_same<T, std::complex<float>>::value) {
1103 (&ione,
"V",
"I",
"U", &matrix_size__,
reinterpret_cast<std::complex<float>*
>(A__.at(sddk::memory_t::host)), &ione,
1104 &ione, desca,
reinterpret_cast<std::complex<float>*
>(B__.at(sddk::memory_t::host)), &ione, &ione, descb,
1105 reinterpret_cast<float*
>(&d1),
reinterpret_cast<float*
>(&d1), &ione, &nev__, &abstol_, &m, &nz,
1106 reinterpret_cast<float*
>(w.get()), &ortfac_,
reinterpret_cast<std::complex<float>*
>(Z__.at(sddk::memory_t::host)),
1107 &ione, &ione, descz,
reinterpret_cast<std::complex<float>*
>(work.get()), &lwork,
1108 reinterpret_cast<float*
>(rwork.get()), &lrwork, iwork.get(), &liwork, ifail.get(),
1109 iclustr.get(),
reinterpret_cast<float*
>(gap.get()), &info, (ftn_int)1, (ftn_int)1, (ftn_int)1);
1113 if ((m != nev__) || (nz != nev__)) {
1114 RTE_WARNING(
"Not all eigen-vectors or eigen-values are found.");
1119 if ((info / 2) % 2) {
1120 std::stringstream s;
1121 s <<
"eigenvectors corresponding to one or more clusters of eigenvalues" << std::endl
1122 <<
"could not be reorthogonalized because of insufficient workspace" << std::endl;
1124 int k = A__.blacs_grid().comm().size();
1125 for (
int i = 0; i < A__.blacs_grid().comm().size() - 1; i++) {
1126 if ((iclustr.get()[2 * i + 1] != 0) && (iclustr.get()[2 * (i + 1)] == 0)) {
1132 s <<
"number of eigenvalue clusters : " << k << std::endl;
1133 for (
int i = 0; i < k; i++) {
1134 s << iclustr.get()[2 * i] <<
" : " << iclustr.get()[2 * i + 1] << std::endl;
1139 std::stringstream s;
1140 if (std::is_same<T, std::complex<double>>::value) {
1141 s <<
"pzhegvx returned " << info;
1142 }
else if (std::is_same<T, std::complex<float>>::value) {
1143 s <<
"pchegvx returned " << info;
1147 std::copy(w.get(), w.get() + nev__, eval__);
1153 int solve(ftn_int matrix_size__, ftn_int nev__,
dmatrix<std::complex<double>>& A__,
dmatrix<std::complex<double>>& B__,
1154 double* eval__,
dmatrix<std::complex<double>>& Z__)
override
1156 PROFILE(
"Eigensolver_scalapack|pzhegvx");
1157 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1160 int solve(ftn_int matrix_size__, ftn_int nev__,
dmatrix<std::complex<float>>& A__,
dmatrix<std::complex<float>>& B__,
1161 float* eval__,
dmatrix<std::complex<float>>& Z__)
override
1163 PROFILE(
"Eigensolver_scalapack|pchegvx");
1164 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1168class Eigensolver_scalapack :
public Eigensolver
1171 Eigensolver_scalapack()
1179class Eigensolver_dlaf :
public Eigensolver
1192 template <
typename T>
1193 int solve_(ftn_int matrix_size__, dmatrix<T>& A__, real_type<T>* eval__, dmatrix<T>& Z__)
1195 DLAF_descriptor desca{matrix_size__, matrix_size__, A__.bs_row(), A__.bs_col(), 0, 0, 0, 0,
static_cast<int>(A__.ld())};
1196 DLAF_descriptor descz{matrix_size__, matrix_size__, Z__.bs_row(), Z__.bs_col(), 0, 0, 0, 0,
static_cast<int>(Z__.ld())};
1198 if (std::is_same_v<T, std::complex<double>>) {
1199 return dlaf_hermitian_eigensolver_z(A__.blacs_grid().context(),
'L',
reinterpret_cast<std::complex<double>*
>(A__.at(sddk::memory_t::host)), desca,
reinterpret_cast<double*
>(eval__),
reinterpret_cast<std::complex<double>*
>(Z__.at(sddk::memory_t::host)), descz);
1200 }
else if (std::is_same_v<T, std::complex<float>>) {
1201 return dlaf_hermitian_eigensolver_c(A__.blacs_grid().context(),
'L',
reinterpret_cast<std::complex<float>*
>(A__.at(sddk::memory_t::host)), desca,
reinterpret_cast<float*
>(eval__),
reinterpret_cast<std::complex<float>*
>(Z__.at(sddk::memory_t::host)), descz);
1203 else if (std::is_same_v<T, double>){
1204 return dlaf_symmetric_eigensolver_d(A__.blacs_grid().context(),
'L',
reinterpret_cast<double*
>(A__.at(sddk::memory_t::host)), desca,
reinterpret_cast<double*
>(eval__),
reinterpret_cast<double*
>(Z__.at(sddk::memory_t::host)), descz);
1205 }
else if (std::is_same_v<T, float>){
1206 return dlaf_symmetric_eigensolver_s(A__.blacs_grid().context(),
'L',
reinterpret_cast<float*
>(A__.at(sddk::memory_t::host)), desca,
reinterpret_cast<float*
>(eval__),
reinterpret_cast<float*
>(Z__.at(sddk::memory_t::host)), descz);
1211 int solve(ftn_int matrix_size__, dmatrix<std::complex<double>>& A__,
double* eval__, dmatrix<std::complex<double>>& Z__)
override
1213 PROFILE(
"Eigensolver_dlaf|dlaf_hermitian_eigensolver_z");
1214 return solve_(matrix_size__, A__, eval__, Z__);
1217 int solve(ftn_int matrix_size__, dmatrix<std::complex<float>>& A__,
float* eval__, dmatrix<std::complex<float>>& Z__)
override
1219 PROFILE(
"Eigensolver_dlaf|dlaf_hermitian_eigensolver_c");
1220 return solve_(matrix_size__, A__, eval__, Z__);
1223 int solve(ftn_int matrix_size__, dmatrix<double>& A__,
double* eval__, dmatrix<double>& Z__)
override
1225 PROFILE(
"Eigensolver_dlaf|dlaf_symmetric_eigensolver_d");
1226 return solve_(matrix_size__, A__, eval__, Z__);
1229 int solve(ftn_int matrix_size__, dmatrix<float>& A__,
float* eval__, dmatrix<float>& Z__)
override
1231 PROFILE(
"Eigensolver_dlaf|dlaf_symmetric_eigensolver_s");
1232 return solve_(matrix_size__, A__, eval__, Z__);
1236 template <
typename T>
1237 int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix<T>& A__, real_type<T>* eval__, dmatrix<T>& Z__)
1239 auto& mph = get_memory_pool(sddk::memory_t::host);
1240 auto w = mph.get_unique_ptr<real_type<T>>(matrix_size__);
1242 auto info = solve_(matrix_size__, A__, w.get(), Z__);
1244 std::copy(w.get(), w.get() + nev__, eval__);
1250 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__,
double* eval__, dmatrix<std::complex<double>>& Z__)
override
1252 PROFILE(
"Eigensolver_dlaf|dlaf_hermitian_eigensolver_z");
1253 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1256 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<float>>& A__,
float* eval__, dmatrix<std::complex<float>>& Z__)
override
1258 PROFILE(
"Eigensolver_dlaf|dlaf_hermitian_eigensolver_c");
1259 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1262 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__,
double* eval__, dmatrix<double>& Z__)
override
1264 PROFILE(
"Eigensolver_dlaf|dlaf_symmetric_eigensolver_d");
1265 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1268 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<float>& A__,
float* eval__, dmatrix<float>& Z__)
override
1270 PROFILE(
"Eigensolver_dlaf|dlaf_symmetric_eigensolver_s");
1271 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1275 template <
typename T>
1276 int solve_(ftn_int matrix_size__, dmatrix<T>& A__, dmatrix<T>& B__, real_type<T>* eval__, dmatrix<T>& Z__){
1277 DLAF_descriptor desca{matrix_size__, matrix_size__, A__.bs_row(), A__.bs_col(), 0, 0, 0, 0,
static_cast<int>(A__.ld())};
1278 DLAF_descriptor descb{matrix_size__, matrix_size__, B__.bs_row(), B__.bs_col(), 0, 0, 0, 0,
static_cast<int>(B__.ld())};
1279 DLAF_descriptor descz{matrix_size__, matrix_size__, Z__.bs_row(), Z__.bs_col(), 0, 0, 0, 0,
static_cast<int>(Z__.ld())};
1281 if (std::is_same_v<T, std::complex<double>>) {
1282 return dlaf_hermitian_generalized_eigensolver_z(A__.blacs_grid().context(),
'L',
reinterpret_cast<std::complex<double>*
>(A__.at(sddk::memory_t::host)), desca,
reinterpret_cast<std::complex<double>*
>(B__.at(sddk::memory_t::host)), descb,
reinterpret_cast<double*
>(eval__),
reinterpret_cast<std::complex<double>*
>(Z__.at(sddk::memory_t::host)), descz);
1283 }
else if (std::is_same_v<T, std::complex<float>>) {
1284 return dlaf_hermitian_generalized_eigensolver_c(A__.blacs_grid().context(),
'L',
reinterpret_cast<std::complex<float>*
>(A__.at(sddk::memory_t::host)), desca,
reinterpret_cast<std::complex<float>*
>(B__.at(sddk::memory_t::host)), descb,
reinterpret_cast<float*
>(eval__),
reinterpret_cast<std::complex<float>*
>(Z__.at(sddk::memory_t::host)), descz);
1286 else if (std::is_same_v<T, double>){
1287 return dlaf_symmetric_generalized_eigensolver_d(A__.blacs_grid().context(),
'L',
reinterpret_cast<double*
>(A__.at(sddk::memory_t::host)), desca,
reinterpret_cast<double*
>(B__.at(sddk::memory_t::host)), descb,
reinterpret_cast<double*
>(eval__),
reinterpret_cast<double*
>(Z__.at(sddk::memory_t::host)), descz);
1288 }
else if (std::is_same_v<T, float>){
1289 return dlaf_symmetric_generalized_eigensolver_s(A__.blacs_grid().context(),
'L',
reinterpret_cast<float*
>(A__.at(sddk::memory_t::host)), desca,
reinterpret_cast<float*
>(A__.at(sddk::memory_t::host)), descb,
reinterpret_cast<float*
>(eval__),
reinterpret_cast<float*
>(Z__.at(sddk::memory_t::host)), descz);
1294 int solve(ftn_int matrix_size__, dmatrix<std::complex<double>>& A__, dmatrix<std::complex<double>>& B__,
double* eval__, dmatrix<std::complex<double>>& Z__)
override
1296 PROFILE(
"Eigensolver_dlaf|dlaf_hermitian_generalized_eigensolver_z");
1297 return solve_(matrix_size__, A__, B__, eval__, Z__);
1300 int solve(ftn_int matrix_size__, dmatrix<std::complex<float>>& A__, dmatrix<std::complex<float>>& B__,
float* eval__, dmatrix<std::complex<float>>& Z__)
override
1302 PROFILE(
"Eigensolver_dlaf|dlaf_hermitian_generalized_eigensolver_c");
1303 return solve_(matrix_size__, A__, B__, eval__, Z__);
1306 int solve(ftn_int matrix_size__, dmatrix<double>& A__, dmatrix<double>& B__,
double* eval__, dmatrix<double>& Z__)
override
1308 PROFILE(
"Eigensolver_dlaf|dlaf_symmetric_generalized_eigensolver_d");
1309 return solve_(matrix_size__, A__, B__, eval__, Z__);
1312 int solve(ftn_int matrix_size__, dmatrix<float>& A__, dmatrix<float>& B__,
float* eval__, dmatrix<float>& Z__)
override
1314 PROFILE(
"Eigensolver_dlaf|dlaf_symmetric_generalized_eigensolver_s");
1315 return solve_(matrix_size__, A__, B__, eval__, Z__);
1319 template <
typename T>
1320 int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix<T>& A__, dmatrix<T>& B__, real_type<T>* eval__, dmatrix<T>& Z__){
1321 auto& mph = get_memory_pool(sddk::memory_t::host);
1322 auto w = mph.get_unique_ptr<real_type<T>>(matrix_size__);
1324 auto info = solve_(matrix_size__, A__, B__, w.get(), Z__);
1326 std::copy(w.get(), w.get() + nev__, eval__);
1332 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__, dmatrix<std::complex<double>>& B__,
double* eval__, dmatrix<std::complex<double>>& Z__)
override
1334 PROFILE(
"Eigensolver_dlaf|dlaf_hermitian_generalized_eigensolver_z");
1335 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1338 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<float>>& A__, dmatrix<std::complex<float>>& B__,
float* eval__, dmatrix<std::complex<float>>& Z__)
override
1340 PROFILE(
"Eigensolver_dlaf|dlaf_hermitian_generalized_eigensolver_c");
1341 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1344 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__, dmatrix<double>& B__,
double* eval__, dmatrix<double>& Z__)
override
1346 PROFILE(
"Eigensolver_dlaf|dlaf_symmetric_generalized_eigensolver_d");
1347 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1350 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<float>& A__, dmatrix<float>& B__,
float* eval__, dmatrix<float>& Z__)
override
1352 PROFILE(
"Eigensolver_dlaf|dlaf_symmetric_generalized_eigensolver_s");
1353 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1378 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__, dmatrix<double>& B__,
double* eval__,
1379 dmatrix<double>& Z__)
override
1381 PROFILE(
"Eigensolver_magma|dsygvdx");
1383 int nt = omp_get_max_threads();
1387 auto& mph = get_memory_pool(sddk::memory_t::host);
1388 auto& mphp = get_memory_pool(sddk::memory_t::host_pinned);
1389 auto w = mph.get_unique_ptr<
double>(matrix_size__);
1396 magma_dsyevdx_getworksize(matrix_size__, magma_get_parallel_numthreads(), 1, &lwork, &liwork);
1398 auto h_work = mphp.get_unique_ptr<
double>(lwork);
1399 auto iwork = mph.get_unique_ptr<magma_int_t>(liwork);
1401 magma_dsygvdx_2stage(1, MagmaVec, MagmaRangeI, MagmaLower, matrix_size__, A__.at(sddk::memory_t::host), lda,
1402 B__.at(sddk::memory_t::host), ldb, 0.0, 0.0, 1, nev__, &m, w.get(), h_work.get(), lwork,
1403 iwork.get(), liwork, &info);
1406 if (nt != omp_get_max_threads()) {
1407 RTE_THROW(
"magma has changed the number of threads");
1415 std::copy(w.get(), w.get() + nev__, eval__);
1416 #pragma omp parallel for schedule(static)
1417 for (
int i = 0; i < nev__; i++) {
1418 std::copy(A__.at(sddk::memory_t::host, 0, i), A__.at(sddk::memory_t::host, 0, i) + matrix_size__,
1419 Z__.at(sddk::memory_t::host, 0, i));
1427 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__, dmatrix<std::complex<double>>& B__,
1428 double* eval__, dmatrix<std::complex<double>>& Z__)
override
1430 PROFILE(
"Eigensolver_magma|zhegvdx");
1432 int nt = omp_get_max_threads();
1436 auto& mph = get_memory_pool(sddk::memory_t::host);
1437 auto& mphp = get_memory_pool(sddk::memory_t::host_pinned);
1438 auto w = mph.get_unique_ptr<
double>(matrix_size__);
1446 magma_zheevdx_getworksize(matrix_size__, magma_get_parallel_numthreads(), 1, &lwork, &lrwork, &liwork);
1448 auto h_work = mphp.get_unique_ptr<std::complex<double>>(lwork);
1449 auto rwork = mphp.get_unique_ptr<
double>(lrwork);
1450 auto iwork = mph.get_unique_ptr<magma_int_t>(liwork);
1452 magma_zhegvdx_2stage(1, MagmaVec, MagmaRangeI, MagmaLower, matrix_size__,
1453 reinterpret_cast<magmaDoubleComplex*
>(A__.at(sddk::memory_t::host)), lda,
1454 reinterpret_cast<magmaDoubleComplex*
>(B__.at(sddk::memory_t::host)), ldb, 0.0, 0.0,
1455 1, nev__, &m, w.get(),
reinterpret_cast<magmaDoubleComplex*
>(h_work.get()), lwork,
1456 rwork.get(), lrwork, iwork.get(), liwork, &info);
1458 if (nt != omp_get_max_threads()) {
1459 RTE_THROW(
"magma has changed the number of threads");
1467 std::copy(w.get(), w.get() + nev__, eval__);
1468 #pragma omp parallel for schedule(static)
1469 for (
int i = 0; i < nev__; i++) {
1470 std::copy(A__.at(sddk::memory_t::host, 0, i), A__.at(sddk::memory_t::host, 0, i) + matrix_size__,
1471 Z__.at(sddk::memory_t::host, 0, i));
1479 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__,
double* eval__, dmatrix<double>& Z__)
override
1481 PROFILE(
"Eigensolver_magma|dsygvdx");
1484 if (matrix_size__ <= 128) {
1485 return Eigensolver_lapack().solve(matrix_size__, nev__, A__, eval__, Z__);
1488 auto& mph = get_memory_pool(sddk::memory_t::host);
1489 auto& mphp = get_memory_pool(sddk::memory_t::host_pinned);
1491 int nt = omp_get_max_threads();
1493 auto w = mph.get_unique_ptr<
double>(matrix_size__);
1497 magma_dsyevdx_getworksize(matrix_size__, magma_get_parallel_numthreads(), 1, &lwork, &liwork);
1499 auto h_work = mphp.get_unique_ptr<
double>(lwork);
1500 auto iwork = mph.get_unique_ptr<magma_int_t>(liwork);
1505 magma_dsyevdx(MagmaVec, MagmaRangeI, MagmaLower, matrix_size__, A__.at(sddk::memory_t::host), lda, 0.0, 0.0, 1,
1506 nev__, &m, w.get(), h_work.get(), lwork, iwork.get(), liwork, &info);
1508 if (nt != omp_get_max_threads()) {
1509 RTE_THROW(
"magma has changed the number of threads");
1517 std::copy(w.get(), w.get() + nev__, eval__);
1518 #pragma omp parallel for schedule(static)
1519 for (
int i = 0; i < nev__; i++) {
1520 std::copy(A__.at(sddk::memory_t::host, 0, i), A__.at(sddk::memory_t::host, 0, i) + matrix_size__,
1521 Z__.at(sddk::memory_t::host, 0, i));
1529 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__,
double* eval__, dmatrix<std::complex<double>>& Z__)
override
1531 PROFILE(
"Eigensolver_magma|zheevdx");
1533 int nt = omp_get_max_threads();
1535 auto& mph = get_memory_pool(sddk::memory_t::host);
1536 auto& mphp = get_memory_pool(sddk::memory_t::host_pinned);
1537 auto w = mph.get_unique_ptr<
double>(matrix_size__);
1544 magma_zheevdx_getworksize(matrix_size__, magma_get_parallel_numthreads(), 1, &lwork, &lrwork, &liwork);
1546 auto h_work = mphp.get_unique_ptr<std::complex<double>>(lwork);
1547 auto rwork = mphp.get_unique_ptr<
double>(lrwork);
1548 auto iwork = mph.get_unique_ptr<magma_int_t>(liwork);
1550 magma_zheevdx_2stage(MagmaVec, MagmaRangeI, MagmaLower, matrix_size__,
1551 reinterpret_cast<magmaDoubleComplex*
>(A__.at(sddk::memory_t::host)), lda, 0.0, 0.0, 1,
1552 nev__, &m, w.get(),
reinterpret_cast<magmaDoubleComplex*
>(h_work.get()), lwork, rwork.get(),
1553 lrwork, iwork.get(), liwork, &info);
1555 if (nt != omp_get_max_threads()) {
1556 RTE_THROW(
"magma has changed the number of threads");
1564 std::copy(w.get(), w.get() + nev__, eval__);
1565 #pragma omp parallel for schedule(static)
1566 for (
int i = 0; i < nev__; i++) {
1567 std::copy(A__.at(sddk::memory_t::host, 0, i), A__.at(sddk::memory_t::host, 0, i) + matrix_size__,
1568 Z__.at(sddk::memory_t::host, 0, i));
1576class Eigensolver_magma_gpu:
public Eigensolver
1579 Eigensolver_magma_gpu()
1585 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<std::complex<double>>& A__,
double* eval__,
1586 dmatrix<std::complex<double>>& Z__)
override
1588 PROFILE(
"Eigensolver_magma_gpu|zheevdx");
1590 int nt = omp_get_max_threads();
1592 auto& mph = get_memory_pool(sddk::memory_t::host);
1593 auto& mphp = get_memory_pool(sddk::memory_t::host_pinned);
1594 auto w = mph.get_unique_ptr<
double>(matrix_size__);
1596 acc::copyin(A__.at(sddk::memory_t::device), A__.ld(), A__.at(sddk::memory_t::host), A__.ld(), matrix_size__,
1604 magma_zheevdx_getworksize(matrix_size__, magma_get_parallel_numthreads(), 1, &lwork, &lrwork, &liwork);
1606 int llda = matrix_size__ + 32;
1607 auto z_work = mphp.get_unique_ptr<std::complex<double>>(llda * matrix_size__);
1609 auto h_work = mphp.get_unique_ptr<std::complex<double>>(lwork);
1610 auto rwork = mphp.get_unique_ptr<
double>(lrwork);
1611 auto iwork = mph.get_unique_ptr<magma_int_t>(liwork);
1613 magma_zheevdx_gpu(MagmaVec, MagmaRangeI, MagmaLower, matrix_size__,
1614 reinterpret_cast<magmaDoubleComplex*
>(A__.at(sddk::memory_t::device)), lda, 0.0, 0.0, 1,
1615 nev__, &m, w.get(),
reinterpret_cast<magmaDoubleComplex*
>(z_work.get()), llda,
1616 reinterpret_cast<magmaDoubleComplex*
>(h_work.get()), lwork, rwork.get(), lrwork, iwork.get(),
1619 if (nt != omp_get_max_threads()) {
1620 RTE_THROW(
"magma has changed the number of threads");
1628 std::copy(w.get(), w.get() + nev__, eval__);
1629 acc::copyout(Z__.at(sddk::memory_t::host), Z__.ld(), A__.at(sddk::memory_t::device), A__.ld(),
1630 matrix_size__, nev__);
1637 int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix<double>& A__,
double* eval__, dmatrix<double>& Z__)
override
1639 PROFILE(
"Eigensolver_magma_gpu|dsyevdx");
1641 int nt = omp_get_max_threads();
1643 auto& mph = get_memory_pool(sddk::memory_t::host);
1644 auto& mphp = get_memory_pool(sddk::memory_t::host_pinned);
1645 auto w = mph.get_unique_ptr<
double>(matrix_size__);
1647 acc::copyin(A__.at(sddk::memory_t::device), A__.ld(), A__.at(sddk::memory_t::host), A__.ld(), matrix_size__,
1654 magma_dsyevdx_getworksize(matrix_size__, magma_get_parallel_numthreads(), 1, &lwork, &liwork);
1656 int llda = matrix_size__ + 32;
1657 auto z_work = mphp.get_unique_ptr<
double>(llda * matrix_size__);
1658 auto h_work = mphp.get_unique_ptr<
double>(lwork);
1659 auto iwork = mph.get_unique_ptr<magma_int_t>(liwork);
1661 magma_dsyevdx_gpu(MagmaVec , MagmaRangeI , MagmaLower , matrix_size__ ,
1662 A__.at(sddk::memory_t::device) , lda , 0.0 , 0.0 , 1 ,
1663 nev__ , &m , w.get() , z_work.get() , llda ,
1664 h_work.get() , lwork , iwork.get() , liwork ,
1667 if (nt != omp_get_max_threads()) {
1668 RTE_THROW(
"magma has changed the number of threads");
1676 std::copy(w.get(), w.get() + nev__, eval__);
1677 acc::copyout(Z__.at(sddk::memory_t::host), Z__.ld(), A__.at(sddk::memory_t::device), A__.ld(),
1678 matrix_size__, nev__);
1705#if defined(SIRIUS_CUDA)
1714 template <
typename T>
1715 int solve_(ftn_int matrix_size__,
int nev__,
dmatrix<T>& A__, real_type<T>* eval__,
dmatrix<T>& Z__)
1717 cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_VECTOR;
1718 cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER;
1719 cusolverEigRange_t range = CUSOLVER_EIG_RANGE_I;
1721 auto& mpd = get_memory_pool(sddk::memory_t::device);
1722 auto w = mpd.get_unique_ptr<real_type<T>>(matrix_size__);
1723 acc::copyin(A__.at(sddk::memory_t::device), A__.
ld(), A__.at(sddk::memory_t::host), A__.
ld(), matrix_size__, matrix_size__);
1727 auto vl = -std::numeric_limits<real_type<T>>::infinity();
1728 auto vu = std::numeric_limits<real_type<T>>::infinity();
1730 if (std::is_same<T, double>::value) {
1731 CALL_CUSOLVER(cusolverDnDsyevdx_bufferSize, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1732 reinterpret_cast<double*
>(A__.at(sddk::memory_t::device)), A__.
ld(),
1733 vl, vu, 1, nev__, &h_meig,
reinterpret_cast<double*
>(w.get()), &lwork));
1734 }
else if (std::is_same<T, float>::value) {
1735 CALL_CUSOLVER(cusolverDnSsyevdx_bufferSize, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1736 reinterpret_cast<float*
>(A__.at(sddk::memory_t::device)), A__.
ld(), vl,
1737 vu, 1, nev__, &h_meig,
reinterpret_cast<float*
>(w.get()), &lwork));
1738 }
else if (std::is_same<T, std::complex<double>>::value) {
1739 CALL_CUSOLVER(cusolverDnZheevdx_bufferSize, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1740 reinterpret_cast<cuDoubleComplex*
>(A__.at(sddk::memory_t::device)),
1741 A__.
ld(), vl, vu, 1, nev__, &h_meig,
reinterpret_cast<double*
>(w.get()), &lwork));
1742 }
else if (std::is_same<T, std::complex<float>>::value) {
1743 CALL_CUSOLVER(cusolverDnCheevdx_bufferSize, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1744 reinterpret_cast<cuFloatComplex*
>(A__.at(sddk::memory_t::device)),
1745 A__.
ld(), vl, vu, 1, nev__, &h_meig,
reinterpret_cast<float*
>(w.get()), &lwork));
1748 auto work = mpd.get_unique_ptr<T>(lwork);
1751 auto dinfo = mpd.get_unique_ptr<
int>(1);
1752 if (std::is_same<T, double>::value) {
1753 CALL_CUSOLVER(cusolverDnDsyevdx, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1754 reinterpret_cast<double*
>(A__.at(sddk::memory_t::device)), A__.
ld(), vl, vu, 1, nev__, &h_meig,
1755 reinterpret_cast<double*
>(w.get()),
reinterpret_cast<double*
>(work.get()), lwork, dinfo.get()));
1756 }
else if (std::is_same<T, float>::value) {
1757 CALL_CUSOLVER(cusolverDnSsyevdx, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1758 reinterpret_cast<float*
>(A__.at(sddk::memory_t::device)), A__.
ld(), vl, vu, 1, nev__, &h_meig,
1759 reinterpret_cast<float*
>(w.get()),
reinterpret_cast<float*
>(work.get()), lwork, dinfo.get()));
1760 }
else if (std::is_same<T, std::complex<double>>::value) {
1761 CALL_CUSOLVER(cusolverDnZheevdx, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1762 reinterpret_cast<cuDoubleComplex*
>(A__.at(sddk::memory_t::device)), A__.
ld(),
1763 vl, vu, 1, nev__, &h_meig,
reinterpret_cast<double*
>(w.get()),
1764 reinterpret_cast<cuDoubleComplex*
>(work.get()), lwork, dinfo.get()));
1765 }
else if (std::is_same<T, std::complex<float>>::value) {
1766 CALL_CUSOLVER(cusolverDnCheevdx, (acc::cusolver::cusolver_handle(), jobz, range, uplo, matrix_size__,
1767 reinterpret_cast<cuFloatComplex*
>(A__.at(sddk::memory_t::device)), A__.
ld(),
1768 vl, vu, 1, nev__, &h_meig,
reinterpret_cast<float*
>(w.get()),
1769 reinterpret_cast<cuFloatComplex*
>(work.get()), lwork, dinfo.get()));
1775 acc::copyout(Z__.at(sddk::memory_t::host), Z__.
ld(), A__.at(sddk::memory_t::device), A__.
ld(), matrix_size__, nev__);
1783 PROFILE(
"Eigensolver_cuda|dsyevdx");
1784 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1789 PROFILE(
"Eigensolver_cuda|ssyevdx");
1790 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1793 int solve(ftn_int matrix_size__,
int nev__, dmatrix<std::complex<float>>& A__,
float* eval__, dmatrix<std::complex<float>>& Z__)
override
1795 PROFILE(
"Eigensolver_cuda|cheevdx");
1796 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1799 int solve(ftn_int matrix_size__,
int nev__, dmatrix<std::complex<double>>& A__,
double* eval__, dmatrix<std::complex<double>>& Z__)
override
1801 PROFILE(
"Eigensolver_cuda|zheevdx");
1802 return solve_(matrix_size__, nev__, A__, eval__, Z__);
1805 template <
typename T>
1806 int solve_(ftn_int matrix_size__,
int nev__, dmatrix<T>& A__, dmatrix<T>& B__, real_type<T>* eval__, dmatrix<T>& Z__)
1808 cusolverEigType_t itype = CUSOLVER_EIG_TYPE_1;
1809 cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_VECTOR;
1810 cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER;
1811 cusolverEigRange_t range = CUSOLVER_EIG_RANGE_I;
1813 auto& mpd = get_memory_pool(sddk::memory_t::device);
1814 auto w = mpd.get_unique_ptr<real_type<T>>(matrix_size__);
1815 acc::copyin(A__.at(sddk::memory_t::device), A__.ld(), A__.at(sddk::memory_t::host), A__.ld(), matrix_size__, matrix_size__);
1816 acc::copyin(B__.at(sddk::memory_t::device), B__.ld(), B__.at(sddk::memory_t::host), B__.ld(), matrix_size__, matrix_size__);
1820 auto vl = -std::numeric_limits<real_type<T>>::infinity();
1821 auto vu = std::numeric_limits<real_type<T>>::infinity();
1823 if (std::is_same<T, double>::value) {
1824 CALL_CUSOLVER(cusolverDnDsygvdx_bufferSize, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1825 reinterpret_cast<double*
>(A__.at(sddk::memory_t::device)), A__.ld(),
1826 reinterpret_cast<double*
>(B__.at(sddk::memory_t::device)), B__.ld(),
1827 vl, vu, 1, nev__, &h_meig,
reinterpret_cast<double*
>(w.get()), &lwork));
1828 }
else if (std::is_same<T, float>::value) {
1829 CALL_CUSOLVER(cusolverDnSsygvdx_bufferSize, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1830 reinterpret_cast<float*
>(A__.at(sddk::memory_t::device)), A__.ld(),
1831 reinterpret_cast<float*
>(B__.at(sddk::memory_t::device)), B__.ld(),
1832 vl, vu, 1, nev__, &h_meig,
reinterpret_cast<float*
>(w.get()), &lwork));
1833 }
else if (std::is_same<T, std::complex<double>>::value) {
1834 CALL_CUSOLVER(cusolverDnZhegvdx_bufferSize, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1835 reinterpret_cast<cuDoubleComplex*
>(A__.at(sddk::memory_t::device)), A__.ld(),
1836 reinterpret_cast<cuDoubleComplex*
>(B__.at(sddk::memory_t::device)), B__.ld(),
1837 vl, vu, 1, nev__, &h_meig,
reinterpret_cast<double*
>(w.get()), &lwork));
1838 }
else if (std::is_same<T, std::complex<float>>::value) {
1839 CALL_CUSOLVER(cusolverDnChegvdx_bufferSize, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1840 reinterpret_cast<cuFloatComplex*
>(A__.at(sddk::memory_t::device)), A__.ld(),
1841 reinterpret_cast<cuFloatComplex*
>(B__.at(sddk::memory_t::device)), B__.ld(),
1842 vl, vu, 1, nev__, &h_meig,
reinterpret_cast<float*
>(w.get()), &lwork));
1845 auto work = mpd.get_unique_ptr<T>(lwork);
1848 auto dinfo = mpd.get_unique_ptr<
int>(1);
1849 if (std::is_same<T, double>::value) {
1850 CALL_CUSOLVER(cusolverDnDsygvdx, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1851 reinterpret_cast<double*
>(A__.at(sddk::memory_t::device)), A__.ld(),
1852 reinterpret_cast<double*
>(B__.at(sddk::memory_t::device)), B__.ld(),
1853 vl, vu, 1, nev__, &h_meig,
reinterpret_cast<double*
>(w.get()),
1854 reinterpret_cast<double*
>(work.get()), lwork, dinfo.get()));
1855 }
else if (std::is_same<T, float>::value) {
1856 CALL_CUSOLVER(cusolverDnSsygvdx, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1857 reinterpret_cast<float*
>(A__.at(sddk::memory_t::device)), A__.ld(),
1858 reinterpret_cast<float*
>(B__.at(sddk::memory_t::device)), B__.ld(),
1859 vl, vu, 1, nev__, &h_meig,
reinterpret_cast<float*
>(w.get()),
1860 reinterpret_cast<float*
>(work.get()), lwork, dinfo.get()));
1861 }
else if (std::is_same<T, std::complex<double>>::value) {
1862 CALL_CUSOLVER(cusolverDnZhegvdx, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1863 reinterpret_cast<cuDoubleComplex*
>(A__.at(sddk::memory_t::device)), A__.ld(),
1864 reinterpret_cast<cuDoubleComplex*
>(B__.at(sddk::memory_t::device)), B__.ld(),
1865 vl, vu, 1, nev__, &h_meig,
reinterpret_cast<double*
>(w.get()),
1866 reinterpret_cast<cuDoubleComplex*
>(work.get()), lwork, dinfo.get()));
1867 }
else if (std::is_same<T, std::complex<float>>::value) {
1868 CALL_CUSOLVER(cusolverDnChegvdx, (acc::cusolver::cusolver_handle(), itype, jobz, range, uplo, matrix_size__,
1869 reinterpret_cast<cuFloatComplex*
>(A__.at(sddk::memory_t::device)), A__.ld(),
1870 reinterpret_cast<cuFloatComplex*
>(B__.at(sddk::memory_t::device)), B__.ld(),
1871 vl, vu, 1, nev__, &h_meig,
reinterpret_cast<float*
>(w.get()),
1872 reinterpret_cast<cuFloatComplex*
>(work.get()), lwork, dinfo.get()));
1878 acc::copyout(Z__.at(sddk::memory_t::host), Z__.ld(), A__.at(sddk::memory_t::device), A__.ld(), matrix_size__, nev__);
1886 PROFILE(
"Eigensolver_cuda|dsygvdx");
1887 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1892 PROFILE(
"Eigensolver_cuda|ssygvdx");
1893 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1896 int solve(ftn_int matrix_size__,
int nev__, dmatrix<std::complex<double>>& A__, dmatrix<std::complex<double>>& B__,
1897 double* eval__, dmatrix<std::complex<double>>& Z__)
override
1899 PROFILE(
"Eigensolver_cuda|zhegvdx");
1900 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1903 int solve(ftn_int matrix_size__,
int nev__, dmatrix<std::complex<float>>& A__, dmatrix<std::complex<float>>& B__,
1904 float* eval__, dmatrix<std::complex<float>>& Z__)
override
1906 PROFILE(
"Eigensolver_cuda|chegvdx");
1907 return solve_(matrix_size__, nev__, A__, B__, eval__, Z__);
1913 return solve(matrix_size__, matrix_size__, A__, B__, eval__, Z__);
1918 return solve(matrix_size__, matrix_size__, A__, B__, eval__, Z__);
1921 int solve(ftn_int matrix_size__,
dmatrix<std::complex<double>>& A__,
dmatrix<std::complex<double>>& B__,
double* eval__,
1922 dmatrix<std::complex<double>>& Z__)
override
1924 return solve(matrix_size__, matrix_size__, A__, B__, eval__, Z__);
1927 int solve(ftn_int matrix_size__,
dmatrix<std::complex<float>>& A__,
dmatrix<std::complex<float>>& B__,
float* eval__,
1928 dmatrix<std::complex<float>>& Z__)
override
1930 return solve(matrix_size__, matrix_size__, A__, B__, eval__, Z__);
1934class Eigensolver_cuda:
public Eigensolver
int solve(ftn_int matrix_size__, int nev__, dmatrix< float > &A__, float *eval__, dmatrix< float > &Z__) override
wrapper for dynamic binding
int solve(ftn_int matrix_size__, dmatrix< std::complex< float > > &A__, dmatrix< std::complex< float > > &B__, float *eval__, dmatrix< std::complex< float > > &Z__) override
Solve a generalized eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, int nev__, dmatrix< double > &A__, dmatrix< double > &B__, double *eval__, dmatrix< double > &Z__) override
wrapper for dynamic binding
int solve(ftn_int matrix_size__, dmatrix< std::complex< double > > &A__, dmatrix< std::complex< double > > &B__, double *eval__, dmatrix< std::complex< double > > &Z__) override
Solve a generalized eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< float > &A__, dmatrix< float > &B__, float *eval__, dmatrix< float > &Z__) override
Solve a generalized eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< double > &A__, dmatrix< double > &B__, double *eval__, dmatrix< double > &Z__) override
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< double > &A__, dmatrix< double > &B__, double *eval__, dmatrix< double > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< float > > &A__, float *eval__, dmatrix< std::complex< float > > &Z__) override
Solve a standard eigen-value problem of a sub-matrix for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< double > > &A__, dmatrix< std::complex< double > > &B__, double *eval__, dmatrix< std::complex< double > > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix< T > &A__, real_type< T > *eval__, dmatrix< T > &Z__)
Solve a standard eigen-value problem for N lowest eigen-pairs.
int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix< T > &A__, dmatrix< T > &B__, real_type< T > *eval__, dmatrix< T > &Z__)
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< float > &A__, dmatrix< float > &B__, float *eval__, dmatrix< float > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< double > &A__, double *eval__, dmatrix< double > &Z__) override
wrapper for solving a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< float > &A__, float *eval__, dmatrix< float > &Z__) override
Solve a standard eigen-value problem of a sub-matrix for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< std::complex< float > > &A__, float *eval__, dmatrix< std::complex< float > > &Z__) override
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< double > &A__, dmatrix< double > &B__, double *eval__, dmatrix< double > &Z__) override
wrapper for solving a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< float > &A__, float *eval__, dmatrix< float > &Z__) override
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< double > &A__, double *eval__, dmatrix< double > &Z__) override
wrapper for solving a standard eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< std::complex< double > > &A__, double *eval__, dmatrix< std::complex< double > > &Z__) override
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< float > > &A__, dmatrix< std::complex< float > > &B__, float *eval__, dmatrix< std::complex< float > > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< double > > &A__, double *eval__, dmatrix< std::complex< double > > &Z__) override
Solve a standard eigen-value problem of a sub-matrix for N lowest eigen-pairs.
int solve_(ftn_int matrix_size__, dmatrix< T > &A__, real_type< T > *eval__, dmatrix< T > &Z__)
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< float > > &A__, dmatrix< std::complex< float > > &B__, float *eval__, dmatrix< std::complex< float > > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix< T > &A__, real_type< T > *eval__, dmatrix< T > &Z__)
Solve a standard eigen-value problem for N lowest eigen-pairs.
int solve_(ftn_int matrix_size__, dmatrix< T > &A__, real_type< T > *eval__, dmatrix< T > &Z__)
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< double > &A__, dmatrix< double > &B__, double *eval__, dmatrix< double > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix< T > &A__, dmatrix< T > &B__, real_type< T > *eval__, dmatrix< T > &Z__)
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< double > > &A__, dmatrix< std::complex< double > > &B__, double *eval__, dmatrix< std::complex< double > > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< double > &A__, double *eval__, dmatrix< double > &Z__) override
Solve a standard eigen-value problem of a sub-matrix for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< std::complex< float > > &A__, float *eval__, dmatrix< std::complex< float > > &Z__) override
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< float > &A__, float *eval__, dmatrix< float > &Z__) override
Solve a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< double > &A__, double *eval__, dmatrix< double > &Z__) override
Solve a standard eigen-value problem for all eigen-pairs.
int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix< T > &A__, dmatrix< T > &B__, T *eval__, dmatrix< T > &Z__)
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< double > > &A__, double *eval__, dmatrix< std::complex< double > > &Z__) override
Solve a standard eigen-value problem of a sub-matrix for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< float > &A__, dmatrix< float > &B__, float *eval__, dmatrix< float > &Z__) override
Solve a generalized eigen-value problem for N lowest eigen-pairs.
int solve_(ftn_int matrix_size__, ftn_int nev__, dmatrix< T > &A__, T *eval__, dmatrix< T > &Z__)
Solve a standard eigen-value problem for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< std::complex< float > > &A__, float *eval__, dmatrix< std::complex< float > > &Z__) override
Solve a standard eigen-value problem of a sub-matrix for N lowest eigen-pairs.
int solve(ftn_int matrix_size__, dmatrix< std::complex< double > > &A__, double *eval__, dmatrix< std::complex< double > > &Z__) override
wrapper for solving a standard eigen-value problem for all eigen-pairs.
int solve(ftn_int matrix_size__, ftn_int nev__, dmatrix< float > &A__, float *eval__, dmatrix< float > &Z__) override
Solve a standard eigen-value problem of a sub-matrix for N lowest eigen-pairs.
Interface to different eigen-solvers.
virtual int solve(ftn_int matrix_size__, dmatrix< double > &A__, double *eval__, dmatrix< double > &Z__)
Solve a standard eigen-value problem for all eigen-pairs.
Eigensolver(ev_solver_t type__, bool is_parallel__, sddk::memory_t host_memory_t__, sddk::memory_t data_memory_t__)
Constructor.
int bs_row() const
Row blocking factor.
int bs_col() const
Column blocking factor.
uint32_t ld() const
Return leading dimension size.
Interface to CUDA eigen-solver library.
Contains definition of eigensolver factory.
Linear algebra interface.
Interface to some of the MAGMA functions.
memory_t
Memory types where the code can store data.
@ host_pinned
Pinned host memory. This is host memory + extra bit flag.
void copyout(T *target__, T const *source__, size_t n__)
Copy memory from device to host.
void copyin(T *target__, T const *source__, size_t n__)
Copy memory from host to device.
void copy(T *target__, T const *source__, size_t n__)
Copy memory inside a device.
ev_solver_t
Type of eigen-value solver.
@ cusolver
CUDA eigen-solver.
@ magma
MAGMA with CPU pointers.
Namespace of the SIRIUS library.
void initialize(bool call_mpi_init__=true)
Initialize the library.
void finalize(bool call_mpi_fin__=true, bool reset_device__=true, bool fftw_cleanup__=true)
Shut down the library.
Add or substitute OMP functions.
Eror and warning handling during run-time execution.