SIRIUS 7.5.0
Electronic structure library and applications
checksum.cu
Go to the documentation of this file.
1// Copyright (c) 2013-2023 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file checksum.cu
21 *
22 * \brief Compute checkum on GPU.
23 */
24
27#include "core/acc/acc.hpp"
28
29extern sirius::acc_stream_t* streams;
30
31using namespace sirius;
32using namespace sirius::acc;
33
34template <typename T>
35__global__ void add_checksum_gpu_kernel(gpu_complex_type<T> const* ptr__, int ld__, int n__,
36 gpu_complex_type<T>* result__)
37{
38 int N = num_blocks(n__, blockDim.x);
39
40 ACC_DYNAMIC_SHARED(char, sdata_ptr)
41 T* sdata_x = (T*)&sdata_ptr[0];
42 T* sdata_y = (T*)&sdata_ptr[blockDim.x * sizeof(T)];
43
44 sdata_x[threadIdx.x] = 0.0;
45 sdata_y[threadIdx.x] = 0.0;
46
47 for (int i = 0; i < N; i++) {
48 int j = i * blockDim.x + threadIdx.x;
49 if (j < n__) {
50 int k = array2D_offset(j, blockIdx.x, ld__);
51 sdata_x[threadIdx.x] += ptr__[k].x;
52 sdata_y[threadIdx.x] += ptr__[k].y;
53 }
54 }
55 __syncthreads();
56
57 for (int s = 1; s < blockDim.x; s *= 2) {
58 if (threadIdx.x % (2 * s) == 0) {
59 sdata_x[threadIdx.x] = sdata_x[threadIdx.x] + sdata_x[threadIdx.x + s];
60 sdata_y[threadIdx.x] = sdata_y[threadIdx.x] + sdata_y[threadIdx.x + s];
61 }
62 __syncthreads();
63 }
64
65 result__[blockIdx.x] = add_accNumbers(result__[blockIdx.x], make_accComplex(sdata_x[0], sdata_y[0]));
66}
67
68extern "C" {
69
70void add_checksum_gpu_double(acc_complex_double_t* ptr__, int ld__, int nrow__, int ncol__, acc_complex_double_t* result__)
71{
72 dim3 grid_t(64);
73 dim3 grid_b(ncol__);
74
75 accLaunchKernel((add_checksum_gpu_kernel<double>), dim3(grid_b), dim3(grid_t), 2 * grid_t.x * sizeof(double), 0,
76 ptr__, ld__, nrow__, result__);
77}
78
79void add_checksum_gpu_float(acc_complex_float_t* ptr__, int ld__, int nrow__, int ncol__, acc_complex_float_t* result__)
80{
81 dim3 grid_t(64);
82 dim3 grid_b(ncol__);
83
84 accLaunchKernel((add_checksum_gpu_kernel<float>), dim3(grid_b), dim3(grid_t), 2 * grid_t.x * sizeof(float), 0,
85 ptr__, ld__, nrow__, result__);
86}
87
88}
Interface to accelerators API.
Common device functions used by GPU kernels.
Uniform interface to the runtime API of CUDA and ROCm.
Namespace for accelerator-related functions.
Definition: acc.cpp:30
Namespace of the SIRIUS library.
Definition: sirius.f90:5