25#ifndef __ACC_COMMON_HPP__
26#define __ACC_COMMON_HPP__
36struct Real<gpu_complex_type<float>>
42struct Real<gpu_complex_type<double>>
48using real_type =
typename Real<T>::type;
52const double twopi = 6.2831853071795864769;
54inline __device__
size_t array2D_offset(
int i0,
int i1,
int ld0)
59inline __device__
size_t array3D_offset(
int i0,
int i1,
int i2,
int ld0,
int ld1)
61 return i0 + ld0 * (i1 + i2 * ld1);
64inline __device__
size_t array4D_offset(
int i0,
int i1,
int i2,
int i3,
int ld0,
int ld1,
int ld2)
66 return i0 + ld0 * (i1 + ld1 * (i2 + i3 * ld2));
69inline __host__ __device__
int num_blocks(
int length,
int block_size)
71 return (length / block_size) + ((length % block_size) ? 1 : 0);
74inline __device__
auto add_accNumbers(
double x,
double y)
79inline __device__
auto add_accNumbers(
float x,
float y)
84inline __device__
auto add_accNumbers(gpu_complex_type<double> x, gpu_complex_type<double> y)
89inline __device__
auto add_accNumbers(gpu_complex_type<float> x, gpu_complex_type<float> y)
91 return accCaddf(x, y);
94inline __device__
auto sub_accNumbers(
double x,
double y)
99inline __device__
auto sub_accNumbers(
float x,
float y)
104inline __device__
auto sub_accNumbers(gpu_complex_type<double> x, gpu_complex_type<double> y)
106 return accCsub(x, y);
109inline __device__
auto sub_accNumbers(gpu_complex_type<float> x, gpu_complex_type<float> y)
111 return accCsubf(x, y);
114inline __device__
auto make_accComplex(
float x,
float y)
116 return make_accFloatComplex(x, y);
119inline __device__
auto make_accComplex(
double x,
double y)
121 return make_accDoubleComplex(x, y);
124inline __device__
auto mul_accNumbers(gpu_complex_type<double> x, gpu_complex_type<double> y)
126 return accCmul(x, y);
129inline __device__
auto mul_accNumbers(
double x, gpu_complex_type<double> y)
131 return make_accComplex(x * y.x, x * y.y);
134inline __device__
auto mul_accNumbers(gpu_complex_type<float> x, gpu_complex_type<float> y)
136 return accCmulf(x, y);
139inline __device__
auto mul_accNumbers(
float x, gpu_complex_type<float> y)
141 return make_accComplex(x * y.x, x * y.y);
145inline __device__
auto accZero();
148inline __device__
auto accZero<double>()
154inline __device__
auto accZero<float>()
160inline __device__
auto accZero<gpu_complex_type<double>>()
162 return make_accComplex(
double{0},
double{0});
166inline __device__
auto accZero<gpu_complex_type<float>>()
168 return make_accComplex(
float{0},
float{0});
171inline bool __device__ is_zero(gpu_complex_type<float> x)
173 return (x.x == 0.0) && (x.y == 0);
176inline bool __device__ is_zero(gpu_complex_type<double> x)
178 return (x.x == 0.0) && (x.y == 0);
181inline bool __device__ is_zero(
float x)
186inline bool __device__ is_zero(
double x)
Interface to accelerators API.
Uniform interface to the runtime API of CUDA and ROCm.
Namespace of the SIRIUS library.
Contains typedefs, enums and simple descriptors.