Loading [MathJax]/extensions/TeX/AMSsymbols.js
SIRIUS 7.5.0
Electronic structure library and applications
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
cusolver.hpp
Go to the documentation of this file.
1/** \file cusolver.hpp
2 *
3 * \brief Interface to CUDA eigen-solver library.
4 *
5 */
6
7#ifndef __CUSOLVER_HPP__
8#define __CUSOLVER_HPP__
9
10#include "acc.hpp"
11#include "SDDK/memory.hpp"
12#include <cusolverDn.h>
13
14namespace sirius {
15
16namespace acc {
17
18/// Interface to cuSolver library.
19namespace cusolver {
20
21inline void error_message(cusolverStatus_t status)
22{
23 switch (status) {
24 case CUSOLVER_STATUS_NOT_INITIALIZED: {
25 std::printf("the CUDA Runtime initialization failed\n");
26 break;
27 }
28 case CUSOLVER_STATUS_ALLOC_FAILED: {
29 std::printf("the resources could not be allocated\n");
30 break;
31 }
32 case CUSOLVER_STATUS_ARCH_MISMATCH: {
33 std::printf("the device only supports compute capability 2.0 and above\n");
34 break;
35 }
36 case CUSOLVER_STATUS_INVALID_VALUE: {
37 std::printf("An unsupported value or parameter was passed to the function\n");
38 break;
39 }
40 case CUSOLVER_STATUS_EXECUTION_FAILED: {
41 std::printf("The GPU program failed to execute. This is often caused by a launch failure of the kernel on the GPU, which can be caused by multiple reasons.\n");
42 break;
43 }
44 case CUSOLVER_STATUS_INTERNAL_ERROR: {
45 std::printf("An internal cuSolver operation failed. This error is usually caused by a cudaMemcpyAsync() failure.\n");
46 break;
47 }
48 case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: {
49 std::printf("The matrix type is not supported by this function. This is usually caused by passing an invalid matrix descriptor to the function.\n");
50 break;
51 }
52 default: {
53 std::printf("cusolver status unknown\n");
54 }
55 }
56}
57
58#define CALL_CUSOLVER(func__, args__) \
59{ \
60 cusolverStatus_t status; \
61 if ((status = func__ args__) != CUSOLVER_STATUS_SUCCESS) { \
62 acc::cusolver::error_message(status); \
63 char nm[1024]; \
64 gethostname(nm, 1024); \
65 std::printf("hostname: %s\n", nm); \
66 std::printf("Error in %s at line %i of file %s\n", #func__, __LINE__, __FILE__); \
67 acc::stack_backtrace(); \
68 } \
69}
70
71cusolverDnHandle_t& cusolver_handle();
72void create_handle();
73void destroy_handle();
74
75template <typename>
77
78template<>
79struct type_wrapper<float>
80{
81 static constexpr cudaDataType type = CUDA_R_32F;
82};
83
84template<>
85struct type_wrapper<double>
86{
87 static constexpr cudaDataType type = CUDA_R_64F;
88};
89
90template<>
91struct type_wrapper<std::complex<float>>
92{
93 static constexpr cudaDataType type = CUDA_C_32F;
94};
95
96template<>
97struct type_wrapper<std::complex<double>>
98{
99 static constexpr cudaDataType type = CUDA_C_64F;
100};
101
102template <typename T>
103int potrf(int n__, T* A__, int lda__)
104{
105 int64_t n = n__;
106 int64_t lda = lda__;
107 size_t d_lwork{0};
108 size_t h_lwork{0};
109
110 cublasFillMode_t uplo = CUBLAS_FILL_MODE_UPPER;
111
112 /* work size */
113 CALL_CUSOLVER(cusolverDnXpotrf_bufferSize,
114 (cusolver_handle(), NULL, uplo, n, type_wrapper<T>::type, A__, lda, type_wrapper<T>::type, &d_lwork, &h_lwork));
115
116 auto d_work = get_memory_pool(sddk::memory_t::device).get_unique_ptr<T>(d_lwork);
117 sddk::mdarray<int, 1> info(1);
118 info.allocate(get_memory_pool(sddk::memory_t::device));
119
120 void* hwork{nullptr};
121
122 CALL_CUSOLVER(cusolverDnXpotrf,
123 (cusolver_handle(), NULL, uplo, n, type_wrapper<T>::type, A__, lda, type_wrapper<T>::type, d_work.get(),
124 d_lwork, hwork, h_lwork, info.at(sddk::memory_t::device)));
125
126 info.copy_to(sddk::memory_t::host);
127 return info[0];
128}
129
130template <typename T>
131int trtri(int n__, T* A__, int lda__)
132{
133 int64_t n = n__;
134 int64_t lda = lda__;
135 size_t d_lwork{0};
136 size_t h_lwork{0};
137 /* work size */
138 CALL_CUSOLVER(cusolverDnXtrtri_bufferSize,
139 (cusolver_handle(), CUBLAS_FILL_MODE_UPPER, CUBLAS_DIAG_NON_UNIT, n, type_wrapper<T>::type, A__, lda,
140 &d_lwork, &h_lwork));
141
142 auto h_work = get_memory_pool(sddk::memory_t::host).get_unique_ptr<char>(h_lwork + 1);
143 auto d_work = get_memory_pool(sddk::memory_t::device).get_unique_ptr<char>(d_lwork);
144 sddk::mdarray<int, 1> info(1);
145 info.allocate(get_memory_pool(sddk::memory_t::device));
146
147 CALL_CUSOLVER(cusolverDnXtrtri,
148 (cusolver_handle(), CUBLAS_FILL_MODE_UPPER, CUBLAS_DIAG_NON_UNIT, n, type_wrapper<T>::type,
149 A__, lda, d_work.get(), d_lwork, h_work.get(), h_lwork, info.at(sddk::memory_t::device)));
150 info.copy_to(sddk::memory_t::host);
151 return info[0];
152}
153
154} // namespace cusolver
155
156} // namespace acc
157
158} // namespace sirius
159
160#endif
Interface to accelerators API.
std::unique_ptr< T, memory_t_deleter_base > get_unique_ptr(size_t n__)
Return a unique pointer to the allocated memory.
Definition: memory.hpp:456
Memory management functions and classes.
@ cusolver
CUDA eigen-solver.
Namespace of the SIRIUS library.
Definition: sirius.f90:5