32__global__
void update_density_rg_1_complex_gpu_kernel(
int size__,
33 gpu_complex_type<T>
const* psi_rg__,
37 int ir = blockIdx.x * blockDim.x + threadIdx.x;
39 gpu_complex_type<T> z = psi_rg__[ir];
40 density_rg__[ir] += (z.x * z.x + z.y * z.y) * wt__;
45extern "C" void update_density_rg_1_complex_gpu_double(
int size__,
46 acc_complex_double_t
const* psi_rg__,
53 dim3 grid_b(num_blocks(size__, grid_t.x));
55 accLaunchKernel((update_density_rg_1_complex_gpu_kernel<double>), dim3(grid_b), dim3(grid_t), 0, 0, size__,
56 psi_rg__, wt__, density_rg__);
59extern "C" void update_density_rg_1_complex_gpu_float(
int size__,
60 acc_complex_float_t
const* psi_rg__,
67 dim3 grid_b(num_blocks(size__, grid_t.x));
69 accLaunchKernel((update_density_rg_1_complex_gpu_kernel<float>), dim3(grid_b), dim3(grid_t), 0, 0, size__,
70 psi_rg__, wt__, density_rg__);
74__global__
void update_density_rg_1_real_gpu_kernel(
int size__,
79 int ir = blockIdx.x * blockDim.x + threadIdx.x;
82 density_rg__[ir] += p * p * wt__;
87extern "C" void update_density_rg_1_real_gpu_double(
int size__,
88 double const* psi_rg__,
95 dim3 grid_b(num_blocks(size__, grid_t.x));
97 accLaunchKernel((update_density_rg_1_real_gpu_kernel<double>), dim3(grid_b), dim3(grid_t), 0, 0,
105extern "C" void update_density_rg_1_real_gpu_float(
int size__,
106 float const* psi_rg__,
113 dim3 grid_b(num_blocks(size__, grid_t.x));
115 accLaunchKernel((update_density_rg_1_real_gpu_kernel<float>), dim3(grid_b), dim3(grid_t), 0, 0,
124__global__
void update_density_rg_2_gpu_kernel(
int size__,
125 gpu_complex_type<T>
const* psi_up_rg__,
126 gpu_complex_type<T>
const* psi_dn_rg__,
132__global__
void update_density_rg_2_gpu_kernel<double>(
int size__,
133 acc_complex_double_t
const* psi_up_rg__,
134 acc_complex_double_t
const* psi_dn_rg__,
136 double* density_x_rg__,
137 double* density_y_rg__)
139 int ir = blockIdx.x * blockDim.x + threadIdx.x;
141 acc_complex_double_t z = accCmul(psi_up_rg__[ir], accConj(psi_dn_rg__[ir]));
142 density_x_rg__[ir] += 2 * z.x * wt__;
143 density_y_rg__[ir] -= 2 * z.y * wt__;
148__global__
void update_density_rg_2_gpu_kernel<float>(
int size__,
149 acc_complex_float_t
const* psi_up_rg__,
150 acc_complex_float_t
const* psi_dn_rg__,
152 float* density_x_rg__,
153 float* density_y_rg__)
155 int ir = blockIdx.x * blockDim.x + threadIdx.x;
157 acc_complex_float_t z = accCmulf(psi_up_rg__[ir], accConjf(psi_dn_rg__[ir]));
158 density_x_rg__[ir] += 2 * z.x * wt__;
159 density_y_rg__[ir] -= 2 * z.y * wt__;
164extern "C" void update_density_rg_2_gpu_double(
int size__,
165 acc_complex_double_t
const* psi_up_rg__,
166 acc_complex_double_t
const* psi_dn_rg__,
168 double* density_x_rg__,
169 double* density_y_rg__)
174 dim3 grid_b(num_blocks(size__, grid_t.x));
176 accLaunchKernel((update_density_rg_2_gpu_kernel<double>), dim3(grid_b), dim3(grid_t), 0, 0,
186extern "C" void update_density_rg_2_gpu_float(
int size__,
187 acc_complex_float_t
const* psi_up_rg__,
188 acc_complex_float_t
const* psi_dn_rg__,
190 float* density_x_rg__,
191 float* density_y_rg__)
196 dim3 grid_b(num_blocks(size__, grid_t.x));
198 accLaunchKernel((update_density_rg_2_gpu_kernel<float>), dim3(grid_b), dim3(grid_t), 0, 0,
Common device functions used by GPU kernels.
Uniform interface to the runtime API of CUDA and ROCm.
Namespace for accelerator-related functions.
Namespace of the SIRIUS library.