SIRIUS 7.5.0
Electronic structure library and applications
acc_lapack.hpp
1// Copyright (c) 2023 Simon Pintarelli, Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20#ifndef __ACC_LAPACK_HPP__
21#define __ACC_LAPACK_HPP__
22
23#include "acc_blas.hpp"
24#include "core/rte/rte.hpp"
25
26#if defined(SIRIUS_CUDA)
27#include "core/acc/cusolver.hpp"
28#elif defined(SIRIUS_ROCM)
30#endif
31
32namespace sirius {
33
34namespace acc {
35
36/// Interface to accelerated lapack functions.
37namespace lapack {
38
39inline int getrf(int m, int n, acc_complex_double_t* A, int* devIpiv, int lda)
40{
41#if defined (SIRIUS_CUDA)
42 auto& handle = cusolver::cusolver_handle();
43 int* devInfo = acc::allocate<int>(1);
44
45 int lwork;
46 CALL_CUSOLVER(cusolverDnZgetrf_bufferSize, (handle, m, n, A, lda, &lwork));
47 auto workspace = acc::allocate<cuDoubleComplex>(lwork);
48 CALL_CUSOLVER(cusolverDnZgetrf, (handle, m, n, reinterpret_cast<cuDoubleComplex *>(A), lda, workspace, devIpiv, devInfo));
49 acc::deallocate(workspace);
50
51 int cpuInfo;
52 acc::copyout(&cpuInfo, devInfo, 1);
53 acc::deallocate(devInfo);
54 return cpuInfo;
55#elif defined(SIRIUS_ROCM)
56 auto& handle = rocsolver::rocsolver_handle();
57 int cpuInfo;
58 int* devInfo = acc::allocate<int>(1);
59
60 rocsolver::zgetrf(handle, m, n, A, devIpiv, lda, devInfo);
61
62 acc::copyout(&cpuInfo, devInfo, 1);
63 acc::deallocate(devInfo);
64 return cpuInfo;
65#endif
66}
67
68inline int getrs(char trans, int n, int nrhs, const acc_complex_double_t* A, int lda, const int* devIpiv, acc_complex_double_t* B, int ldb)
69{
70#if defined(SIRIUS_CUDA)
71 auto& handle = cusolver::cusolver_handle();
72 int* devInfo = acc::allocate<int>(1);
73
74 cublasOperation_t op = blas::get_gpublasOperation_t(trans);
75
76 CALL_CUSOLVER(cusolverDnZgetrs, (handle, op, n, nrhs, A, lda, devIpiv, B, ldb, devInfo));
77
78 int cpuInfo;
79 acc::copyout(&cpuInfo, devInfo, 1);
80 acc::deallocate(devInfo);
81 if (cpuInfo != 0) {
82 RTE_THROW("Error: cusolver LU solve (Zgetrs) failed. " + std::to_string(cpuInfo));
83 }
84 return cpuInfo;
85#elif defined(SIRIUS_ROCM)
86 auto& handle = rocsolver::rocsolver_handle();
87 rocsolver::zgetrs(handle, trans, n, nrhs, const_cast<acc_complex_double_t*>(A), lda, devIpiv, B, ldb);
88 return 0;
89#endif
90}
91
92} // namespace lapack
93
94} // namespace acc
95
96} // namespace sirius
97
98#endif /* __ACC_LAPACK_HPP__ */
Blas functions for execution on GPUs.
Interface to CUDA eigen-solver library.
void zgetrs(rocblas_handle handle, char trans, int n, int nrhs, acc_complex_double_t *A, int lda, const int *devIpiv, acc_complex_double_t *B, int ldb)
Linear Solvers.
void deallocate(void *ptr__)
Deallocate GPU memory.
Definition: acc.hpp:435
void copyout(T *target__, T const *source__, size_t n__)
Copy memory from device to host.
Definition: acc.hpp:367
Namespace of the SIRIUS library.
Definition: sirius.f90:5
Contains implementation of rocsolver wrappers.
Eror and warning handling during run-time execution.