7#ifndef __CUSOLVER_HPP__
8#define __CUSOLVER_HPP__
12#include <cusolverDn.h>
21inline void error_message(cusolverStatus_t status)
24 case CUSOLVER_STATUS_NOT_INITIALIZED: {
25 std::printf(
"the CUDA Runtime initialization failed\n");
28 case CUSOLVER_STATUS_ALLOC_FAILED: {
29 std::printf(
"the resources could not be allocated\n");
32 case CUSOLVER_STATUS_ARCH_MISMATCH: {
33 std::printf(
"the device only supports compute capability 2.0 and above\n");
36 case CUSOLVER_STATUS_INVALID_VALUE: {
37 std::printf(
"An unsupported value or parameter was passed to the function\n");
40 case CUSOLVER_STATUS_EXECUTION_FAILED: {
41 std::printf(
"The GPU program failed to execute. This is often caused by a launch failure of the kernel on the GPU, which can be caused by multiple reasons.\n");
44 case CUSOLVER_STATUS_INTERNAL_ERROR: {
45 std::printf(
"An internal cuSolver operation failed. This error is usually caused by a cudaMemcpyAsync() failure.\n");
48 case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: {
49 std::printf(
"The matrix type is not supported by this function. This is usually caused by passing an invalid matrix descriptor to the function.\n");
53 std::printf(
"cusolver status unknown\n");
58#define CALL_CUSOLVER(func__, args__) \
60 cusolverStatus_t status; \
61 if ((status = func__ args__) != CUSOLVER_STATUS_SUCCESS) { \
62 acc::cusolver::error_message(status); \
64 gethostname(nm, 1024); \
65 std::printf("hostname: %s\n", nm); \
66 std::printf("Error in %s at line %i of file %s\n", #func__, __LINE__, __FILE__); \
67 acc::stack_backtrace(); \
71cusolverDnHandle_t& cusolver_handle();
81 static constexpr cudaDataType type = CUDA_R_32F;
87 static constexpr cudaDataType type = CUDA_R_64F;
93 static constexpr cudaDataType type = CUDA_C_32F;
99 static constexpr cudaDataType type = CUDA_C_64F;
103int potrf(
int n__, T* A__,
int lda__)
110 cublasFillMode_t uplo = CUBLAS_FILL_MODE_UPPER;
113 CALL_CUSOLVER(cusolverDnXpotrf_bufferSize,
114 (cusolver_handle(), NULL, uplo, n, type_wrapper<T>::type, A__, lda, type_wrapper<T>::type, &d_lwork, &h_lwork));
116 auto d_work = get_memory_pool(sddk::memory_t::device).
get_unique_ptr<T>(d_lwork);
118 info.allocate(get_memory_pool(sddk::memory_t::device));
120 void* hwork{
nullptr};
122 CALL_CUSOLVER(cusolverDnXpotrf,
123 (cusolver_handle(), NULL, uplo, n, type_wrapper<T>::type, A__, lda, type_wrapper<T>::type, d_work.get(),
124 d_lwork, hwork, h_lwork, info.at(sddk::memory_t::device)));
126 info.copy_to(sddk::memory_t::host);
131int trtri(
int n__, T* A__,
int lda__)
138 CALL_CUSOLVER(cusolverDnXtrtri_bufferSize,
139 (cusolver_handle(), CUBLAS_FILL_MODE_UPPER, CUBLAS_DIAG_NON_UNIT, n, type_wrapper<T>::type, A__, lda,
140 &d_lwork, &h_lwork));
142 auto h_work = get_memory_pool(sddk::memory_t::host).
get_unique_ptr<
char>(h_lwork + 1);
143 auto d_work = get_memory_pool(sddk::memory_t::device).
get_unique_ptr<
char>(d_lwork);
144 sddk::mdarray<int, 1> info(1);
145 info.allocate(get_memory_pool(sddk::memory_t::device));
147 CALL_CUSOLVER(cusolverDnXtrtri,
148 (cusolver_handle(), CUBLAS_FILL_MODE_UPPER, CUBLAS_DIAG_NON_UNIT, n, type_wrapper<T>::type,
149 A__, lda, d_work.get(), d_lwork, h_work.get(), h_lwork, info.at(sddk::memory_t::device)));
150 info.copy_to(sddk::memory_t::host);
Interface to accelerators API.
std::unique_ptr< T, memory_t_deleter_base > get_unique_ptr(size_t n__)
Return a unique pointer to the allocated memory.
Memory management functions and classes.
@ cusolver
CUDA eigen-solver.
Namespace of the SIRIUS library.