36__global__
void create_beta_gk_gpu_kernel
39 int const* beta_desc__,
40 gpu_complex_type<T>
const* beta_gk_t,
42 double const* atom_pos,
43 gpu_complex_type<T>* beta_gk
47__global__
void create_beta_gk_gpu_kernel<float>
50 int const* beta_desc__,
51 acc_complex_float_t
const* beta_gk_t,
53 double const* atom_pos,
54 acc_complex_float_t* beta_gk
58 int igk = blockDim.x * blockIdx.x + threadIdx.x;
60 int nbf = beta_desc__[array2D_offset(0, ia, 4)];
61 int offset_beta_gk = beta_desc__[array2D_offset(1, ia, 4)];
62 int offset_beta_gk_t = beta_desc__[array2D_offset(2, ia, 4)];
64 if (igk < num_gkvec__) {
66 for (
int x = 0; x < 3; x++) {
67 p += atom_pos[array2D_offset(x, ia, 3)] * gkvec[array2D_offset(x, igk, 3)];
74 for (
int xi = 0; xi < nbf; xi++) {
75 beta_gk[array2D_offset(igk, offset_beta_gk + xi, num_gkvec__)] =
76 accCmulf(beta_gk_t[array2D_offset(igk, offset_beta_gk_t + xi, num_gkvec__)],
77 make_accFloatComplex(cosp, -sinp));
83__global__
void create_beta_gk_gpu_kernel<double>
86 int const* beta_desc__,
87 acc_complex_double_t
const* beta_gk_t,
89 double const* atom_pos,
90 acc_complex_double_t* beta_gk
94 int igk = blockDim.x * blockIdx.x + threadIdx.x;
96 int nbf = beta_desc__[array2D_offset(0, ia, 4)];
97 int offset_beta_gk = beta_desc__[array2D_offset(1, ia, 4)];
98 int offset_beta_gk_t = beta_desc__[array2D_offset(2, ia, 4)];
100 if (igk < num_gkvec__) {
102 for (
int x = 0; x < 3; x++) {
103 p += atom_pos[array2D_offset(x, ia, 3)] * gkvec[array2D_offset(x, igk, 3)];
107 double sinp = sin(p);
108 double cosp = cos(p);
110 for (
int xi = 0; xi < nbf; xi++) {
111 beta_gk[array2D_offset(igk, offset_beta_gk + xi, num_gkvec__)] =
112 accCmul(beta_gk_t[array2D_offset(igk, offset_beta_gk_t + xi, num_gkvec__)],
113 make_accDoubleComplex(cosp, -sinp));
118extern "C" void create_beta_gk_gpu_float(
int num_atoms,
120 int const* beta_desc,
121 acc_complex_float_t
const* beta_gk_t,
123 double const* atom_pos,
124 acc_complex_float_t* beta_gk)
131 dim3 grid_b(num_blocks(num_gkvec, grid_t.x), num_atoms);
133 accLaunchKernel((create_beta_gk_gpu_kernel<float>), dim3(grid_b), dim3(grid_t), 0, 0,
143extern "C" void create_beta_gk_gpu_double(
int num_atoms,
145 int const* beta_desc,
146 acc_complex_double_t
const* beta_gk_t,
148 double const* atom_pos,
149 acc_complex_double_t* beta_gk)
156 dim3 grid_b(num_blocks(num_gkvec, grid_t.x), num_atoms);
158 accLaunchKernel((create_beta_gk_gpu_kernel<double>), dim3(grid_b), dim3(grid_t), 0, 0,
Common device functions used by GPU kernels.
Uniform interface to the runtime API of CUDA and ROCm.
Namespace for accelerator-related functions.
Namespace of the SIRIUS library.