SIRIUS 7.5.0
Electronic structure library and applications
spherical_harmonics.cu
Go to the documentation of this file.
1// Copyright (c) 2013-2019 Anton Kozhevnikov, Thomas Schulthess
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without modification, are permitted provided that
5// the following conditions are met:
6//
7// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
8// following disclaimer.
9// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
10// and the following disclaimer in the documentation and/or other materials provided with the distribution.
11//
12// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
13// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
15// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
16// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
17// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
18// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
20/** \file spherical_harmonics.cu
21 *
22 * \brief CUDA kernels to generate spherical harminics.
23 */
24
25#include <cmath>
28
29using namespace sirius;
30using namespace sirius::acc;
31
32inline __device__ int lmidx(int l, int m)
33{
34 return l * l + l + m;
35}
36
37__global__ void spherical_harmonics_ylm_gpu_kernel(int lmax__, int ntp__, double const* tp__,
38 acc_complex_double_t* ylm__, int ld__)
39{
40 int itp = blockDim.x * blockIdx.x + threadIdx.x;
41
42 if (itp < ntp__) {
43 double theta = tp__[2 * itp];
44 // double phi = tp__[2 * itp + 1];
45 double sint = sin(theta);
46 double cost = cos(theta);
47
48 acc_complex_double_t* ylm = &ylm__[array2D_offset(0, itp, ld__)];
49
50 ylm[0].x = 1.0 / sqrt(2 * twopi);
51 ylm[0].y = 0;
52
53 for (int l = 1; l <= lmax__; l++) {
54 ylm[lmidx(l, l)].x = -sqrt(1 + 1.0 / 2 / l) * sint * ylm[lmidx(l - 1, l - 1)].x;
55 ylm[lmidx(l, l)].y = 0;
56 }
57 for (int l = 0; l < lmax__; l++) {
58 ylm[lmidx(l + 1, l)].x = sqrt(2.0 * l + 3) * cost * ylm[lmidx(l, l)].x;
59 ylm[lmidx(l + 1, l)].y = 0;
60 }
61 for (int m = 0; m <= lmax__ - 2; m++) {
62 for (int l = m + 2; l <= lmax__; l++) {
63 double alm = sqrt(static_cast<double>((2 * l - 1) * (2 * l + 1)) / (l * l - m * m));
64 double blm = sqrt(static_cast<double>((l - 1 - m) * (l - 1 + m)) / ((2 * l - 3) * (2 * l - 1)));
65 ylm[lmidx(l, m)].x = alm * (cost * ylm[lmidx(l - 1, m)].x - blm * ylm[lmidx(l - 2, m)].x);
66 ylm[lmidx(l, m)].y = 0;
67 }
68 }
69
70 //for (int m = 0; m <= lmax__; m++) {
71 // acc_double_complex z = std::exp(double_complex(0.0, m * phi)) * std::pow(-1, m);
72 // for (int l = m; l <= lmax; l++) {
73 // ylm[utils::lm(l, m)] = result_array[gsl_sf_legendre_array_index(l, m)] * z;
74 // if (m && m % 2) {
75 // ylm[utils::lm(l, -m)] = -std::conj(ylm[utils::lm(l, m)]);
76 // } else {
77 // ylm[utils::lm(l, -m)] = std::conj(ylm[utils::lm(l, m)]);
78 // }
79 // }
80 //}
81
82
83 }
84}
85
86extern "C" void spherical_harmonics_ylm_gpu(int lmax__, int ntp__, double const* tp__, acc_complex_double_t* ylm__, int ld__)
87{
88 dim3 grid_t(32);
89 dim3 grid_b(num_blocks(ntp__, grid_t.x));
90 accLaunchKernel((spherical_harmonics_ylm_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0,
91 lmax__, ntp__, tp__, ylm__, ld__);
92}
93
94
95__global__ void spherical_harmonics_rlm_gpu_kernel(int lmax__, int ntp__, double const* theta__, double const* phi__,
96 double* rlm__, int ld__)
97{
98 int itp = blockDim.x * blockIdx.x + threadIdx.x;
99
100 if (itp < ntp__) {
101 double theta = theta__[itp];
102 double phi = phi__[itp];
103 double sint = sin(theta);
104 double cost = cos(theta);
105
106 double* rlm = &rlm__[array2D_offset(0, itp, ld__)];
107
108 rlm[0] = 1.0 / sqrt(2 * twopi);
109
110 for (int l = 1; l <= lmax__; l++) {
111 rlm[lmidx(l, l)] = -sqrt(1 + 1.0 / 2 / l) * sint * rlm[lmidx(l - 1, l - 1)];
112 }
113 for (int l = 0; l < lmax__; l++) {
114 rlm[lmidx(l + 1, l)] = sqrt(2.0 * l + 3) * cost * rlm[lmidx(l, l)];
115 }
116 for (int m = 0; m <= lmax__ - 2; m++) {
117 for (int l = m + 2; l <= lmax__; l++) {
118 double alm = sqrt(static_cast<double>((2 * l - 1) * (2 * l + 1)) / (l * l - m * m));
119 double blm = sqrt(static_cast<double>((l - 1 - m) * (l - 1 + m)) / ((2 * l - 3) * (2 * l - 1)));
120 rlm[lmidx(l, m)] = alm * (cost * rlm[lmidx(l - 1, m)] - blm * rlm[lmidx(l - 2, m)]);
121 }
122 }
123
124 double c0 = cos(phi);
125 double c1 = 1;
126 double s0 = -sin(phi);
127 double s1 = 0;
128 double c2 = 2 * c0;
129
130 double const t = sqrt(2.0);
131
132 for (int m = 1; m <= lmax__; m++) {
133 double c = c2 * c1 - c0;
134 c0 = c1;
135 c1 = c;
136 double s = c2 * s1 - s0;
137 s0 = s1;
138 s1 = s;
139 for (int l = m; l <= lmax__; l++) {
140 double p = rlm[lmidx(l, m)];
141 rlm[lmidx(l, m)] = t * p * c;
142 if (m % 2) {
143 rlm[lmidx(l, -m)] = t * p * s;
144 } else {
145 rlm[lmidx(l, -m)] = -t * p * s;
146 }
147 }
148 }
149 }
150}
151
152extern "C" void spherical_harmonics_rlm_gpu(int lmax__, int ntp__, double const* theta__, double const* phi__,
153 double* rlm__, int ld__)
154{
155 dim3 grid_t(32);
156 dim3 grid_b(num_blocks(ntp__, grid_t.x));
157 accLaunchKernel((spherical_harmonics_rlm_gpu_kernel), dim3(grid_b), dim3(grid_t), 0, 0,
158 lmax__, ntp__, theta__, phi__, rlm__, ld__);
159}
Common device functions used by GPU kernels.
Uniform interface to the runtime API of CUDA and ROCm.
Namespace for accelerator-related functions.
Definition: acc.cpp:30
Namespace of the SIRIUS library.
Definition: sirius.f90:5