32__global__
void scale_matrix_columns_gpu_kernel(
int nrow, gpu_complex_type<T>* mtrx, T* a);
35__global__
void scale_matrix_columns_gpu_kernel<double>
38 acc_complex_double_t* mtrx,
42 int icol = blockIdx.y;
43 int irow = blockIdx.x * blockDim.x + threadIdx.x;
45 mtrx[array2D_offset(irow, icol, nrow)] =
46 accCmul(mtrx[array2D_offset(irow, icol, nrow)], make_accDoubleComplex(a[icol], 0));
51__global__
void scale_matrix_columns_gpu_kernel<float>
54 acc_complex_float_t* mtrx,
58 int icol = blockIdx.y;
59 int irow = blockIdx.x * blockDim.x + threadIdx.x;
61 mtrx[array2D_offset(irow, icol, nrow)] =
62 accCmulf(mtrx[array2D_offset(irow, icol, nrow)], make_accFloatComplex(a[icol], 0));
67extern "C" void scale_matrix_columns_gpu_double(
int nrow,
69 acc_complex_double_t* mtrx,
73 dim3 grid_b(num_blocks(nrow, grid_t.x), ncol);
75 accLaunchKernel((scale_matrix_columns_gpu_kernel<double>), dim3(grid_b), dim3(grid_t), 0, 0, nrow, mtrx, a);
78extern "C" void scale_matrix_columns_gpu_float(
int nrow,
80 acc_complex_float_t* mtrx,
84 dim3 grid_b(num_blocks(nrow, grid_t.x), ncol);
86 accLaunchKernel((scale_matrix_columns_gpu_kernel<float>), dim3(grid_b), dim3(grid_t), 0, 0, nrow, mtrx, a);
89__global__
void scale_matrix_rows_gpu_kernel
92 acc_complex_double_t* mtrx__,
96 int icol = blockIdx.y;
97 int irow = blockDim.x * blockIdx.x + threadIdx.x;
99 acc_complex_double_t z = mtrx__[array2D_offset(irow, icol, nrow__)];
100 mtrx__[array2D_offset(irow, icol, nrow__)] = make_accDoubleComplex(z.x * v__[irow], z.y * v__[irow]);
105extern "C" void scale_matrix_rows_gpu(
int nrow__,
107 acc_complex_double_t* mtrx__,
111 dim3 grid_b(num_blocks(nrow__, grid_t.x), ncol__);
113 accLaunchKernel((scale_matrix_rows_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0,
120__global__
void scale_matrix_elements_gpu_kernel
122 acc_complex_double_t* mtrx__,
128 int icol = blockIdx.y;
129 int irow = blockDim.x * blockIdx.x + threadIdx.x;
131 acc_complex_double_t z = mtrx__[array2D_offset(irow, icol, ld__)];
132 mtrx__[array2D_offset(irow, icol, ld__)] = make_accDoubleComplex(z.x * beta__, z.y * beta__);
136extern "C" void scale_matrix_elements_gpu(acc_complex_double_t* ptr__,
143 dim3 grid_b(num_blocks(nrow__, grid_t.x), ncol__);
145 accLaunchKernel((scale_matrix_elements_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0,
Common device functions used by GPU kernels.
Uniform interface to the runtime API of CUDA and ROCm.
Namespace for accelerator-related functions.
Namespace of the SIRIUS library.