31__global__
void mul_veff_with_phase_factors_gpu_kernel(
int num_gvec_loc__,
32 acc_complex_double_t
const* veff__,
37 double const* atom_pos__,
38 acc_complex_double_t* veff_a__,
42 double ax = atom_pos__[array2D_offset(ia, 0, num_atoms__)];
43 double ay = atom_pos__[array2D_offset(ia, 1, num_atoms__)];
44 double az = atom_pos__[array2D_offset(ia, 2, num_atoms__)];
46 int igloc = blockDim.x * blockIdx.x + threadIdx.x;
47 if (igloc < num_gvec_loc__) {
48 int gvx = gvx__[igloc];
49 int gvy = gvy__[igloc];
50 int gvz = gvz__[igloc];
52 double p = twopi * (ax * gvx + ay * gvy + az * gvz);
54 veff_a__[array2D_offset(igloc, ia, ld__)] = accCmul(veff__[igloc], make_accDoubleComplex(cos(p), sin(p)));
58extern "C" void mul_veff_with_phase_factors_gpu(
int num_atoms__,
60 acc_complex_double_t
const* veff__,
64 double const* atom_pos__,
70 dim3 grid_b(num_blocks(num_gvec_loc__, grid_t.x), num_atoms__);
74 accLaunchKernel((mul_veff_with_phase_factors_gpu_kernel), dim3(grid_b), dim3(grid_t), 0,
stream,
82 (acc_complex_double_t*)veff_a__,
Common device functions used by GPU kernels.
Uniform interface to the runtime API of CUDA and ROCm.
Helper class to wrap stream id (integer number).
Namespace for accelerator-related functions.
acc_stream_t stream(stream_id sid__)
Return a single device stream.
Namespace of the SIRIUS library.