SIRIUS 7.5.0
Electronic structure library and applications
acc_common.hpp
Go to the documentation of this file.
1// Copyright (c) 2013-2022 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file acc_common.hpp
21 *
22 * \brief Common device functions used by GPU kernels.
23 */
24
25#ifndef __ACC_COMMON_HPP__
26#define __ACC_COMMON_HPP__
27
28#include <stdio.h>
29#include "acc.hpp"
30#include "acc_runtime.hpp"
31#include "core/typedefs.hpp"
32
33namespace sirius {
34
35template<>
36struct Real<gpu_complex_type<float>>
37{
38 using type = float;
39};
40
41template<>
42struct Real<gpu_complex_type<double>>
43{
44 using type = double;
45};
46
47template <typename T>
48using real_type = typename Real<T>::type;
49
50namespace acc {
51
52const double twopi = 6.2831853071795864769;
53
54inline __device__ size_t array2D_offset(int i0, int i1, int ld0)
55{
56 return i0 + i1 * ld0;
57}
58
59inline __device__ size_t array3D_offset(int i0, int i1, int i2, int ld0, int ld1)
60{
61 return i0 + ld0 * (i1 + i2 * ld1);
62}
63
64inline __device__ size_t array4D_offset(int i0, int i1, int i2, int i3, int ld0, int ld1, int ld2)
65{
66 return i0 + ld0 * (i1 + ld1 * (i2 + i3 * ld2));
67}
68
69inline __host__ __device__ int num_blocks(int length, int block_size)
70{
71 return (length / block_size) + ((length % block_size) ? 1 : 0);
72}
73
74inline __device__ auto add_accNumbers(double x, double y)
75{
76 return x + y;
77}
78
79inline __device__ auto add_accNumbers(float x, float y)
80{
81 return x + y;
82}
83
84inline __device__ auto add_accNumbers(gpu_complex_type<double> x, gpu_complex_type<double> y)
85{
86 return accCadd(x, y);
87}
88
89inline __device__ auto add_accNumbers(gpu_complex_type<float> x, gpu_complex_type<float> y)
90{
91 return accCaddf(x, y);
92}
93
94inline __device__ auto sub_accNumbers(double x, double y)
95{
96 return x - y;
97}
98
99inline __device__ auto sub_accNumbers(float x, float y)
100{
101 return x - y;
102}
103
104inline __device__ auto sub_accNumbers(gpu_complex_type<double> x, gpu_complex_type<double> y)
105{
106 return accCsub(x, y);
107}
108
109inline __device__ auto sub_accNumbers(gpu_complex_type<float> x, gpu_complex_type<float> y)
110{
111 return accCsubf(x, y);
112}
113
114inline __device__ auto make_accComplex(float x, float y)
115{
116 return make_accFloatComplex(x, y);
117}
118
119inline __device__ auto make_accComplex(double x, double y)
120{
121 return make_accDoubleComplex(x, y);
122}
123
124inline __device__ auto mul_accNumbers(gpu_complex_type<double> x, gpu_complex_type<double> y)
125{
126 return accCmul(x, y);
127}
128
129inline __device__ auto mul_accNumbers(double x, gpu_complex_type<double> y)
130{
131 return make_accComplex(x * y.x, x * y.y);
132}
133
134inline __device__ auto mul_accNumbers(gpu_complex_type<float> x, gpu_complex_type<float> y)
135{
136 return accCmulf(x, y);
137}
138
139inline __device__ auto mul_accNumbers(float x, gpu_complex_type<float> y)
140{
141 return make_accComplex(x * y.x, x * y.y);
142}
143
144template <typename T>
145inline __device__ auto accZero();
146
147template <>
148inline __device__ auto accZero<double>()
149{
150 return 0;
151}
152
153template <>
154inline __device__ auto accZero<float>()
155{
156 return 0;
157}
158
159template <>
160inline __device__ auto accZero<gpu_complex_type<double>>()
161{
162 return make_accComplex(double{0}, double{0});
163}
164
165template <>
166inline __device__ auto accZero<gpu_complex_type<float>>()
167{
168 return make_accComplex(float{0}, float{0});
169}
170
171inline bool __device__ is_zero(gpu_complex_type<float> x)
172{
173 return (x.x == 0.0) && (x.y == 0);
174}
175
176inline bool __device__ is_zero(gpu_complex_type<double> x)
177{
178 return (x.x == 0.0) && (x.y == 0);
179}
180
181inline bool __device__ is_zero(float x)
182{
183 return x == 0.0;
184}
185
186inline bool __device__ is_zero(double x)
187{
188 return x == 0.0;
189}
190
191}
192}
193
194#endif
Interface to accelerators API.
Uniform interface to the runtime API of CUDA and ROCm.
Namespace of the SIRIUS library.
Definition: sirius.f90:5
Contains typedefs, enums and simple descriptors.